2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: Ewald
54 * VdW interaction: LennardJones
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
108 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
109 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
113 _fjsp_v2r8 dummy_mask,cutoff_mask;
114 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
115 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
116 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
123 jindex = nlist->jindex;
125 shiftidx = nlist->shift;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
130 charge = mdatoms->chargeA;
131 nvdwtype = fr->ntype;
133 vdwtype = mdatoms->typeA;
135 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
136 ewtab = fr->ic->tabq_coul_FDV0;
137 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
138 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
140 /* Setup water-specific parameters */
141 inr = nlist->iinr[0];
142 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
143 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
144 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
145 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
147 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
148 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
149 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
150 vdwjidx0A = 2*vdwtype[inr+0];
151 qq00 = _fjsp_mul_v2r8(iq0,jq0);
152 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
153 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
154 qq01 = _fjsp_mul_v2r8(iq0,jq1);
155 qq02 = _fjsp_mul_v2r8(iq0,jq2);
156 qq10 = _fjsp_mul_v2r8(iq1,jq0);
157 qq11 = _fjsp_mul_v2r8(iq1,jq1);
158 qq12 = _fjsp_mul_v2r8(iq1,jq2);
159 qq20 = _fjsp_mul_v2r8(iq2,jq0);
160 qq21 = _fjsp_mul_v2r8(iq2,jq1);
161 qq22 = _fjsp_mul_v2r8(iq2,jq2);
163 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
164 rcutoff_scalar = fr->rcoulomb;
165 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
166 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
168 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
169 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
171 /* Avoid stupid compiler warnings */
179 /* Start outer loop over neighborlists */
180 for(iidx=0; iidx<nri; iidx++)
182 /* Load shift vector for this list */
183 i_shift_offset = DIM*shiftidx[iidx];
185 /* Load limits for loop over neighbors */
186 j_index_start = jindex[iidx];
187 j_index_end = jindex[iidx+1];
189 /* Get outer coordinate index */
191 i_coord_offset = DIM*inr;
193 /* Load i particle coords and add shift vector */
194 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
195 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
197 fix0 = _fjsp_setzero_v2r8();
198 fiy0 = _fjsp_setzero_v2r8();
199 fiz0 = _fjsp_setzero_v2r8();
200 fix1 = _fjsp_setzero_v2r8();
201 fiy1 = _fjsp_setzero_v2r8();
202 fiz1 = _fjsp_setzero_v2r8();
203 fix2 = _fjsp_setzero_v2r8();
204 fiy2 = _fjsp_setzero_v2r8();
205 fiz2 = _fjsp_setzero_v2r8();
207 /* Reset potential sums */
208 velecsum = _fjsp_setzero_v2r8();
209 vvdwsum = _fjsp_setzero_v2r8();
211 /* Start inner kernel loop */
212 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
215 /* Get j neighbor index, and coordinate index */
218 j_coord_offsetA = DIM*jnrA;
219 j_coord_offsetB = DIM*jnrB;
221 /* load j atom coordinates */
222 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
223 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
225 /* Calculate displacement vector */
226 dx00 = _fjsp_sub_v2r8(ix0,jx0);
227 dy00 = _fjsp_sub_v2r8(iy0,jy0);
228 dz00 = _fjsp_sub_v2r8(iz0,jz0);
229 dx01 = _fjsp_sub_v2r8(ix0,jx1);
230 dy01 = _fjsp_sub_v2r8(iy0,jy1);
231 dz01 = _fjsp_sub_v2r8(iz0,jz1);
232 dx02 = _fjsp_sub_v2r8(ix0,jx2);
233 dy02 = _fjsp_sub_v2r8(iy0,jy2);
234 dz02 = _fjsp_sub_v2r8(iz0,jz2);
235 dx10 = _fjsp_sub_v2r8(ix1,jx0);
236 dy10 = _fjsp_sub_v2r8(iy1,jy0);
237 dz10 = _fjsp_sub_v2r8(iz1,jz0);
238 dx11 = _fjsp_sub_v2r8(ix1,jx1);
239 dy11 = _fjsp_sub_v2r8(iy1,jy1);
240 dz11 = _fjsp_sub_v2r8(iz1,jz1);
241 dx12 = _fjsp_sub_v2r8(ix1,jx2);
242 dy12 = _fjsp_sub_v2r8(iy1,jy2);
243 dz12 = _fjsp_sub_v2r8(iz1,jz2);
244 dx20 = _fjsp_sub_v2r8(ix2,jx0);
245 dy20 = _fjsp_sub_v2r8(iy2,jy0);
246 dz20 = _fjsp_sub_v2r8(iz2,jz0);
247 dx21 = _fjsp_sub_v2r8(ix2,jx1);
248 dy21 = _fjsp_sub_v2r8(iy2,jy1);
249 dz21 = _fjsp_sub_v2r8(iz2,jz1);
250 dx22 = _fjsp_sub_v2r8(ix2,jx2);
251 dy22 = _fjsp_sub_v2r8(iy2,jy2);
252 dz22 = _fjsp_sub_v2r8(iz2,jz2);
254 /* Calculate squared distance and things based on it */
255 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
256 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
257 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
258 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
259 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
260 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
261 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
262 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
263 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
265 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
266 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
267 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
268 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
269 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
270 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
271 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
272 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
273 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
275 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
276 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
277 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
278 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
279 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
280 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
281 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
282 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
283 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
285 fjx0 = _fjsp_setzero_v2r8();
286 fjy0 = _fjsp_setzero_v2r8();
287 fjz0 = _fjsp_setzero_v2r8();
288 fjx1 = _fjsp_setzero_v2r8();
289 fjy1 = _fjsp_setzero_v2r8();
290 fjz1 = _fjsp_setzero_v2r8();
291 fjx2 = _fjsp_setzero_v2r8();
292 fjy2 = _fjsp_setzero_v2r8();
293 fjz2 = _fjsp_setzero_v2r8();
295 /**************************
296 * CALCULATE INTERACTIONS *
297 **************************/
299 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
302 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
304 /* EWALD ELECTROSTATICS */
306 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
307 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
308 itab_tmp = _fjsp_dtox_v2r8(ewrt);
309 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
310 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
312 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
313 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
314 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
315 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
316 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
317 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
318 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
319 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
320 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
321 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
323 /* LENNARD-JONES DISPERSION/REPULSION */
325 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
326 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
327 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
328 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
329 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
330 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
332 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
334 /* Update potential sum for this i atom from the interaction with this j atom. */
335 velec = _fjsp_and_v2r8(velec,cutoff_mask);
336 velecsum = _fjsp_add_v2r8(velecsum,velec);
337 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
338 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
340 fscal = _fjsp_add_v2r8(felec,fvdw);
342 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
344 /* Update vectorial force */
345 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
346 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
347 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
349 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
350 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
351 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
355 /**************************
356 * CALCULATE INTERACTIONS *
357 **************************/
359 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
362 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
364 /* EWALD ELECTROSTATICS */
366 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
367 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
368 itab_tmp = _fjsp_dtox_v2r8(ewrt);
369 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
370 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
372 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
373 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
374 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
375 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
376 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
377 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
378 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
379 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
380 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
381 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
383 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
385 /* Update potential sum for this i atom from the interaction with this j atom. */
386 velec = _fjsp_and_v2r8(velec,cutoff_mask);
387 velecsum = _fjsp_add_v2r8(velecsum,velec);
391 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
393 /* Update vectorial force */
394 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
395 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
396 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
398 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
399 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
400 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
404 /**************************
405 * CALCULATE INTERACTIONS *
406 **************************/
408 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
411 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
413 /* EWALD ELECTROSTATICS */
415 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
416 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
417 itab_tmp = _fjsp_dtox_v2r8(ewrt);
418 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
419 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
421 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
422 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
423 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
424 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
425 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
426 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
427 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
428 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
429 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
430 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
432 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
434 /* Update potential sum for this i atom from the interaction with this j atom. */
435 velec = _fjsp_and_v2r8(velec,cutoff_mask);
436 velecsum = _fjsp_add_v2r8(velecsum,velec);
440 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
442 /* Update vectorial force */
443 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
444 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
445 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
447 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
448 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
449 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
453 /**************************
454 * CALCULATE INTERACTIONS *
455 **************************/
457 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
460 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
462 /* EWALD ELECTROSTATICS */
464 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
465 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
466 itab_tmp = _fjsp_dtox_v2r8(ewrt);
467 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
468 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
470 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
471 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
472 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
473 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
474 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
475 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
476 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
477 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
478 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
479 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
481 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
483 /* Update potential sum for this i atom from the interaction with this j atom. */
484 velec = _fjsp_and_v2r8(velec,cutoff_mask);
485 velecsum = _fjsp_add_v2r8(velecsum,velec);
489 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
491 /* Update vectorial force */
492 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
493 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
494 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
496 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
497 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
498 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
502 /**************************
503 * CALCULATE INTERACTIONS *
504 **************************/
506 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
509 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
511 /* EWALD ELECTROSTATICS */
513 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
514 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
515 itab_tmp = _fjsp_dtox_v2r8(ewrt);
516 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
517 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
519 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
520 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
521 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
522 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
523 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
524 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
525 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
526 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
527 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
528 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
530 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
532 /* Update potential sum for this i atom from the interaction with this j atom. */
533 velec = _fjsp_and_v2r8(velec,cutoff_mask);
534 velecsum = _fjsp_add_v2r8(velecsum,velec);
538 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
540 /* Update vectorial force */
541 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
542 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
543 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
545 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
546 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
547 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
551 /**************************
552 * CALCULATE INTERACTIONS *
553 **************************/
555 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
558 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
560 /* EWALD ELECTROSTATICS */
562 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
563 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
564 itab_tmp = _fjsp_dtox_v2r8(ewrt);
565 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
566 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
568 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
569 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
570 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
571 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
572 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
573 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
574 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
575 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
576 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
577 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
579 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
581 /* Update potential sum for this i atom from the interaction with this j atom. */
582 velec = _fjsp_and_v2r8(velec,cutoff_mask);
583 velecsum = _fjsp_add_v2r8(velecsum,velec);
587 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
589 /* Update vectorial force */
590 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
591 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
592 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
594 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
595 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
596 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
600 /**************************
601 * CALCULATE INTERACTIONS *
602 **************************/
604 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
607 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
609 /* EWALD ELECTROSTATICS */
611 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
612 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
613 itab_tmp = _fjsp_dtox_v2r8(ewrt);
614 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
615 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
617 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
618 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
619 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
620 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
621 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
622 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
623 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
624 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
625 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
626 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
628 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
630 /* Update potential sum for this i atom from the interaction with this j atom. */
631 velec = _fjsp_and_v2r8(velec,cutoff_mask);
632 velecsum = _fjsp_add_v2r8(velecsum,velec);
636 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
638 /* Update vectorial force */
639 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
640 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
641 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
643 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
644 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
645 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
649 /**************************
650 * CALCULATE INTERACTIONS *
651 **************************/
653 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
656 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
658 /* EWALD ELECTROSTATICS */
660 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
661 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
662 itab_tmp = _fjsp_dtox_v2r8(ewrt);
663 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
664 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
666 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
667 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
668 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
669 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
670 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
671 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
672 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
673 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
674 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
675 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
677 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
679 /* Update potential sum for this i atom from the interaction with this j atom. */
680 velec = _fjsp_and_v2r8(velec,cutoff_mask);
681 velecsum = _fjsp_add_v2r8(velecsum,velec);
685 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
687 /* Update vectorial force */
688 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
689 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
690 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
692 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
693 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
694 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
698 /**************************
699 * CALCULATE INTERACTIONS *
700 **************************/
702 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
705 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
707 /* EWALD ELECTROSTATICS */
709 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
710 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
711 itab_tmp = _fjsp_dtox_v2r8(ewrt);
712 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
713 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
715 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
716 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
717 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
718 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
719 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
720 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
721 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
722 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
723 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
724 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
726 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
728 /* Update potential sum for this i atom from the interaction with this j atom. */
729 velec = _fjsp_and_v2r8(velec,cutoff_mask);
730 velecsum = _fjsp_add_v2r8(velecsum,velec);
734 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
736 /* Update vectorial force */
737 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
738 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
739 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
741 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
742 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
743 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
747 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
749 /* Inner loop uses 459 flops */
756 j_coord_offsetA = DIM*jnrA;
758 /* load j atom coordinates */
759 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
760 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
762 /* Calculate displacement vector */
763 dx00 = _fjsp_sub_v2r8(ix0,jx0);
764 dy00 = _fjsp_sub_v2r8(iy0,jy0);
765 dz00 = _fjsp_sub_v2r8(iz0,jz0);
766 dx01 = _fjsp_sub_v2r8(ix0,jx1);
767 dy01 = _fjsp_sub_v2r8(iy0,jy1);
768 dz01 = _fjsp_sub_v2r8(iz0,jz1);
769 dx02 = _fjsp_sub_v2r8(ix0,jx2);
770 dy02 = _fjsp_sub_v2r8(iy0,jy2);
771 dz02 = _fjsp_sub_v2r8(iz0,jz2);
772 dx10 = _fjsp_sub_v2r8(ix1,jx0);
773 dy10 = _fjsp_sub_v2r8(iy1,jy0);
774 dz10 = _fjsp_sub_v2r8(iz1,jz0);
775 dx11 = _fjsp_sub_v2r8(ix1,jx1);
776 dy11 = _fjsp_sub_v2r8(iy1,jy1);
777 dz11 = _fjsp_sub_v2r8(iz1,jz1);
778 dx12 = _fjsp_sub_v2r8(ix1,jx2);
779 dy12 = _fjsp_sub_v2r8(iy1,jy2);
780 dz12 = _fjsp_sub_v2r8(iz1,jz2);
781 dx20 = _fjsp_sub_v2r8(ix2,jx0);
782 dy20 = _fjsp_sub_v2r8(iy2,jy0);
783 dz20 = _fjsp_sub_v2r8(iz2,jz0);
784 dx21 = _fjsp_sub_v2r8(ix2,jx1);
785 dy21 = _fjsp_sub_v2r8(iy2,jy1);
786 dz21 = _fjsp_sub_v2r8(iz2,jz1);
787 dx22 = _fjsp_sub_v2r8(ix2,jx2);
788 dy22 = _fjsp_sub_v2r8(iy2,jy2);
789 dz22 = _fjsp_sub_v2r8(iz2,jz2);
791 /* Calculate squared distance and things based on it */
792 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
793 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
794 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
795 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
796 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
797 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
798 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
799 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
800 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
802 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
803 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
804 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
805 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
806 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
807 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
808 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
809 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
810 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
812 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
813 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
814 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
815 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
816 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
817 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
818 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
819 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
820 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
822 fjx0 = _fjsp_setzero_v2r8();
823 fjy0 = _fjsp_setzero_v2r8();
824 fjz0 = _fjsp_setzero_v2r8();
825 fjx1 = _fjsp_setzero_v2r8();
826 fjy1 = _fjsp_setzero_v2r8();
827 fjz1 = _fjsp_setzero_v2r8();
828 fjx2 = _fjsp_setzero_v2r8();
829 fjy2 = _fjsp_setzero_v2r8();
830 fjz2 = _fjsp_setzero_v2r8();
832 /**************************
833 * CALCULATE INTERACTIONS *
834 **************************/
836 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
839 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
841 /* EWALD ELECTROSTATICS */
843 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
844 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
845 itab_tmp = _fjsp_dtox_v2r8(ewrt);
846 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
847 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
849 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
850 ewtabD = _fjsp_setzero_v2r8();
851 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
852 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
853 ewtabFn = _fjsp_setzero_v2r8();
854 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
855 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
856 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
857 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
858 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
860 /* LENNARD-JONES DISPERSION/REPULSION */
862 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
863 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
864 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
865 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
866 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
867 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
869 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
871 /* Update potential sum for this i atom from the interaction with this j atom. */
872 velec = _fjsp_and_v2r8(velec,cutoff_mask);
873 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
874 velecsum = _fjsp_add_v2r8(velecsum,velec);
875 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
876 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
877 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
879 fscal = _fjsp_add_v2r8(felec,fvdw);
881 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
883 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
885 /* Update vectorial force */
886 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
887 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
888 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
890 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
891 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
892 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
896 /**************************
897 * CALCULATE INTERACTIONS *
898 **************************/
900 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
903 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
905 /* EWALD ELECTROSTATICS */
907 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
908 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
909 itab_tmp = _fjsp_dtox_v2r8(ewrt);
910 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
911 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
913 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
914 ewtabD = _fjsp_setzero_v2r8();
915 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
916 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
917 ewtabFn = _fjsp_setzero_v2r8();
918 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
919 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
920 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
921 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
922 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
924 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
926 /* Update potential sum for this i atom from the interaction with this j atom. */
927 velec = _fjsp_and_v2r8(velec,cutoff_mask);
928 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
929 velecsum = _fjsp_add_v2r8(velecsum,velec);
933 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
935 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
937 /* Update vectorial force */
938 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
939 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
940 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
942 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
943 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
944 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
948 /**************************
949 * CALCULATE INTERACTIONS *
950 **************************/
952 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
955 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
957 /* EWALD ELECTROSTATICS */
959 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
960 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
961 itab_tmp = _fjsp_dtox_v2r8(ewrt);
962 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
963 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
965 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
966 ewtabD = _fjsp_setzero_v2r8();
967 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
968 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
969 ewtabFn = _fjsp_setzero_v2r8();
970 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
971 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
972 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
973 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
974 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
976 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
978 /* Update potential sum for this i atom from the interaction with this j atom. */
979 velec = _fjsp_and_v2r8(velec,cutoff_mask);
980 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
981 velecsum = _fjsp_add_v2r8(velecsum,velec);
985 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
987 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
989 /* Update vectorial force */
990 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
991 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
992 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
994 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
995 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
996 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1000 /**************************
1001 * CALCULATE INTERACTIONS *
1002 **************************/
1004 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1007 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1009 /* EWALD ELECTROSTATICS */
1011 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1012 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1013 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1014 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1015 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1017 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1018 ewtabD = _fjsp_setzero_v2r8();
1019 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1020 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1021 ewtabFn = _fjsp_setzero_v2r8();
1022 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1023 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1024 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1025 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
1026 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1028 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1030 /* Update potential sum for this i atom from the interaction with this j atom. */
1031 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1032 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1033 velecsum = _fjsp_add_v2r8(velecsum,velec);
1037 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1039 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1041 /* Update vectorial force */
1042 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1043 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1044 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1046 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1047 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1048 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1052 /**************************
1053 * CALCULATE INTERACTIONS *
1054 **************************/
1056 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1059 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1061 /* EWALD ELECTROSTATICS */
1063 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1064 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1065 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1066 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1067 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1069 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1070 ewtabD = _fjsp_setzero_v2r8();
1071 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1072 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1073 ewtabFn = _fjsp_setzero_v2r8();
1074 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1075 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1076 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1077 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
1078 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1080 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1082 /* Update potential sum for this i atom from the interaction with this j atom. */
1083 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1084 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1085 velecsum = _fjsp_add_v2r8(velecsum,velec);
1089 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1091 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1093 /* Update vectorial force */
1094 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1095 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1096 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1098 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1099 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1100 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1104 /**************************
1105 * CALCULATE INTERACTIONS *
1106 **************************/
1108 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1111 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1113 /* EWALD ELECTROSTATICS */
1115 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1116 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1117 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1118 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1119 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1121 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1122 ewtabD = _fjsp_setzero_v2r8();
1123 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1124 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1125 ewtabFn = _fjsp_setzero_v2r8();
1126 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1127 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1128 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1129 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
1130 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1132 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1134 /* Update potential sum for this i atom from the interaction with this j atom. */
1135 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1136 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1137 velecsum = _fjsp_add_v2r8(velecsum,velec);
1141 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1143 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1145 /* Update vectorial force */
1146 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1147 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1148 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1150 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1151 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1152 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1156 /**************************
1157 * CALCULATE INTERACTIONS *
1158 **************************/
1160 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1163 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1165 /* EWALD ELECTROSTATICS */
1167 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1168 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1169 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1170 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1171 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1173 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1174 ewtabD = _fjsp_setzero_v2r8();
1175 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1176 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1177 ewtabFn = _fjsp_setzero_v2r8();
1178 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1179 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1180 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1181 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
1182 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1184 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1186 /* Update potential sum for this i atom from the interaction with this j atom. */
1187 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1188 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1189 velecsum = _fjsp_add_v2r8(velecsum,velec);
1193 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1195 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1197 /* Update vectorial force */
1198 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1199 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1200 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1202 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1203 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1204 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1208 /**************************
1209 * CALCULATE INTERACTIONS *
1210 **************************/
1212 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1215 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1217 /* EWALD ELECTROSTATICS */
1219 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1220 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1221 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1222 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1223 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1225 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1226 ewtabD = _fjsp_setzero_v2r8();
1227 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1228 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1229 ewtabFn = _fjsp_setzero_v2r8();
1230 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1231 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1232 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1233 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
1234 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1236 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1238 /* Update potential sum for this i atom from the interaction with this j atom. */
1239 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1240 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1241 velecsum = _fjsp_add_v2r8(velecsum,velec);
1245 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1247 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1249 /* Update vectorial force */
1250 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1251 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1252 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1254 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1255 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1256 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1260 /**************************
1261 * CALCULATE INTERACTIONS *
1262 **************************/
1264 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1267 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1269 /* EWALD ELECTROSTATICS */
1271 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1272 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1273 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1274 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1275 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1277 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1278 ewtabD = _fjsp_setzero_v2r8();
1279 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1280 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1281 ewtabFn = _fjsp_setzero_v2r8();
1282 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1283 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1284 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1285 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
1286 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1288 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1290 /* Update potential sum for this i atom from the interaction with this j atom. */
1291 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1292 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1293 velecsum = _fjsp_add_v2r8(velecsum,velec);
1297 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1299 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1301 /* Update vectorial force */
1302 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1303 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1304 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1306 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1307 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1308 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1312 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1314 /* Inner loop uses 459 flops */
1317 /* End of innermost loop */
1319 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1320 f+i_coord_offset,fshift+i_shift_offset);
1323 /* Update potential energies */
1324 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1325 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1327 /* Increment number of inner iterations */
1328 inneriter += j_index_end - j_index_start;
1330 /* Outer loop uses 20 flops */
1333 /* Increment number of outer iterations */
1336 /* Update outer/inner flops */
1338 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459);
1341 * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1342 * Electrostatics interaction: Ewald
1343 * VdW interaction: LennardJones
1344 * Geometry: Water3-Water3
1345 * Calculate force/pot: Force
1348 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1349 (t_nblist * gmx_restrict nlist,
1350 rvec * gmx_restrict xx,
1351 rvec * gmx_restrict ff,
1352 t_forcerec * gmx_restrict fr,
1353 t_mdatoms * gmx_restrict mdatoms,
1354 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1355 t_nrnb * gmx_restrict nrnb)
1357 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1358 * just 0 for non-waters.
1359 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1360 * jnr indices corresponding to data put in the four positions in the SIMD register.
1362 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1363 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1365 int j_coord_offsetA,j_coord_offsetB;
1366 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1367 real rcutoff_scalar;
1368 real *shiftvec,*fshift,*x,*f;
1369 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1371 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1373 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1375 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1376 int vdwjidx0A,vdwjidx0B;
1377 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1378 int vdwjidx1A,vdwjidx1B;
1379 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1380 int vdwjidx2A,vdwjidx2B;
1381 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1382 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1383 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1384 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1385 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1386 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1387 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1388 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1389 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1390 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1391 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1394 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1397 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1398 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1399 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1401 _fjsp_v2r8 itab_tmp;
1402 _fjsp_v2r8 dummy_mask,cutoff_mask;
1403 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1404 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1405 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1412 jindex = nlist->jindex;
1414 shiftidx = nlist->shift;
1416 shiftvec = fr->shift_vec[0];
1417 fshift = fr->fshift[0];
1418 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1419 charge = mdatoms->chargeA;
1420 nvdwtype = fr->ntype;
1421 vdwparam = fr->nbfp;
1422 vdwtype = mdatoms->typeA;
1424 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1425 ewtab = fr->ic->tabq_coul_F;
1426 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1427 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1429 /* Setup water-specific parameters */
1430 inr = nlist->iinr[0];
1431 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1432 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1433 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1434 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1436 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1437 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1438 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1439 vdwjidx0A = 2*vdwtype[inr+0];
1440 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1441 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1442 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1443 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1444 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1445 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1446 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1447 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1448 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1449 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1450 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1452 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1453 rcutoff_scalar = fr->rcoulomb;
1454 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1455 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1457 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
1458 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
1460 /* Avoid stupid compiler warnings */
1462 j_coord_offsetA = 0;
1463 j_coord_offsetB = 0;
1468 /* Start outer loop over neighborlists */
1469 for(iidx=0; iidx<nri; iidx++)
1471 /* Load shift vector for this list */
1472 i_shift_offset = DIM*shiftidx[iidx];
1474 /* Load limits for loop over neighbors */
1475 j_index_start = jindex[iidx];
1476 j_index_end = jindex[iidx+1];
1478 /* Get outer coordinate index */
1480 i_coord_offset = DIM*inr;
1482 /* Load i particle coords and add shift vector */
1483 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1484 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1486 fix0 = _fjsp_setzero_v2r8();
1487 fiy0 = _fjsp_setzero_v2r8();
1488 fiz0 = _fjsp_setzero_v2r8();
1489 fix1 = _fjsp_setzero_v2r8();
1490 fiy1 = _fjsp_setzero_v2r8();
1491 fiz1 = _fjsp_setzero_v2r8();
1492 fix2 = _fjsp_setzero_v2r8();
1493 fiy2 = _fjsp_setzero_v2r8();
1494 fiz2 = _fjsp_setzero_v2r8();
1496 /* Start inner kernel loop */
1497 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1500 /* Get j neighbor index, and coordinate index */
1502 jnrB = jjnr[jidx+1];
1503 j_coord_offsetA = DIM*jnrA;
1504 j_coord_offsetB = DIM*jnrB;
1506 /* load j atom coordinates */
1507 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1508 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1510 /* Calculate displacement vector */
1511 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1512 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1513 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1514 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1515 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1516 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1517 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1518 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1519 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1520 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1521 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1522 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1523 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1524 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1525 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1526 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1527 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1528 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1529 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1530 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1531 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1532 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1533 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1534 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1535 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1536 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1537 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1539 /* Calculate squared distance and things based on it */
1540 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1541 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1542 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1543 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1544 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1545 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1546 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1547 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1548 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1550 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1551 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1552 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1553 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1554 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1555 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1556 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1557 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1558 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1560 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1561 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1562 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1563 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1564 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1565 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1566 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1567 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1568 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1570 fjx0 = _fjsp_setzero_v2r8();
1571 fjy0 = _fjsp_setzero_v2r8();
1572 fjz0 = _fjsp_setzero_v2r8();
1573 fjx1 = _fjsp_setzero_v2r8();
1574 fjy1 = _fjsp_setzero_v2r8();
1575 fjz1 = _fjsp_setzero_v2r8();
1576 fjx2 = _fjsp_setzero_v2r8();
1577 fjy2 = _fjsp_setzero_v2r8();
1578 fjz2 = _fjsp_setzero_v2r8();
1580 /**************************
1581 * CALCULATE INTERACTIONS *
1582 **************************/
1584 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1587 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1589 /* EWALD ELECTROSTATICS */
1591 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1592 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1593 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1594 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1595 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1597 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1599 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1600 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1602 /* LENNARD-JONES DISPERSION/REPULSION */
1604 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1605 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1607 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1609 fscal = _fjsp_add_v2r8(felec,fvdw);
1611 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1613 /* Update vectorial force */
1614 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1615 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1616 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1618 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1619 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1620 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1624 /**************************
1625 * CALCULATE INTERACTIONS *
1626 **************************/
1628 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1631 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1633 /* EWALD ELECTROSTATICS */
1635 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1636 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1637 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1638 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1639 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1641 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1643 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1644 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1646 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1650 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1652 /* Update vectorial force */
1653 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1654 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1655 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1657 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1658 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1659 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1663 /**************************
1664 * CALCULATE INTERACTIONS *
1665 **************************/
1667 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1670 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1672 /* EWALD ELECTROSTATICS */
1674 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1675 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1676 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1677 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1678 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1680 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1682 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1683 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1685 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1689 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1691 /* Update vectorial force */
1692 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1693 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1694 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1696 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1697 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1698 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1702 /**************************
1703 * CALCULATE INTERACTIONS *
1704 **************************/
1706 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1709 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1711 /* EWALD ELECTROSTATICS */
1713 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1714 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1715 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1716 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1717 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1719 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1721 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1722 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1724 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1728 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1730 /* Update vectorial force */
1731 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1732 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1733 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1735 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1736 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1737 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1741 /**************************
1742 * CALCULATE INTERACTIONS *
1743 **************************/
1745 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1748 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1750 /* EWALD ELECTROSTATICS */
1752 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1753 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1754 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1755 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1756 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1758 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1760 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1761 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1763 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1767 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1769 /* Update vectorial force */
1770 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1771 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1772 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1774 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1775 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1776 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1780 /**************************
1781 * CALCULATE INTERACTIONS *
1782 **************************/
1784 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1787 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1789 /* EWALD ELECTROSTATICS */
1791 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1792 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1793 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1794 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1795 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1797 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1799 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1800 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1802 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1806 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1808 /* Update vectorial force */
1809 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1810 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1811 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1813 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1814 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1815 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1819 /**************************
1820 * CALCULATE INTERACTIONS *
1821 **************************/
1823 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1826 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1828 /* EWALD ELECTROSTATICS */
1830 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1831 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1832 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1833 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1834 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1836 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1838 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1839 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1841 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1845 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1847 /* Update vectorial force */
1848 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1849 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1850 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1852 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1853 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1854 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1858 /**************************
1859 * CALCULATE INTERACTIONS *
1860 **************************/
1862 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1865 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1867 /* EWALD ELECTROSTATICS */
1869 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1870 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1871 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1872 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1873 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1875 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1877 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1878 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1880 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1884 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1886 /* Update vectorial force */
1887 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1888 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1889 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1891 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1892 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1893 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1897 /**************************
1898 * CALCULATE INTERACTIONS *
1899 **************************/
1901 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1904 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1906 /* EWALD ELECTROSTATICS */
1908 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1909 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1910 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1911 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1912 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1914 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1916 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1917 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1919 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1923 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1925 /* Update vectorial force */
1926 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1927 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1928 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1930 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1931 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1932 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1936 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1938 /* Inner loop uses 385 flops */
1941 if(jidx<j_index_end)
1945 j_coord_offsetA = DIM*jnrA;
1947 /* load j atom coordinates */
1948 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1949 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1951 /* Calculate displacement vector */
1952 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1953 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1954 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1955 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1956 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1957 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1958 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1959 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1960 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1961 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1962 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1963 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1964 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1965 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1966 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1967 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1968 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1969 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1970 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1971 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1972 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1973 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1974 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1975 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1976 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1977 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1978 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1980 /* Calculate squared distance and things based on it */
1981 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1982 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1983 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1984 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1985 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1986 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1987 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1988 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1989 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1991 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1992 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1993 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1994 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1995 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1996 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1997 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1998 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1999 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
2001 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
2002 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
2003 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
2004 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
2005 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
2006 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
2007 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
2008 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
2009 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
2011 fjx0 = _fjsp_setzero_v2r8();
2012 fjy0 = _fjsp_setzero_v2r8();
2013 fjz0 = _fjsp_setzero_v2r8();
2014 fjx1 = _fjsp_setzero_v2r8();
2015 fjy1 = _fjsp_setzero_v2r8();
2016 fjz1 = _fjsp_setzero_v2r8();
2017 fjx2 = _fjsp_setzero_v2r8();
2018 fjy2 = _fjsp_setzero_v2r8();
2019 fjz2 = _fjsp_setzero_v2r8();
2021 /**************************
2022 * CALCULATE INTERACTIONS *
2023 **************************/
2025 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
2028 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
2030 /* EWALD ELECTROSTATICS */
2032 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2033 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
2034 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2035 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2036 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2038 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2039 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2040 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
2042 /* LENNARD-JONES DISPERSION/REPULSION */
2044 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
2045 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
2047 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
2049 fscal = _fjsp_add_v2r8(felec,fvdw);
2051 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2053 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2055 /* Update vectorial force */
2056 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
2057 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
2058 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
2060 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
2061 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
2062 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
2066 /**************************
2067 * CALCULATE INTERACTIONS *
2068 **************************/
2070 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
2073 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
2075 /* EWALD ELECTROSTATICS */
2077 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2078 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
2079 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2080 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2081 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2083 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2084 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2085 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
2087 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
2091 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2093 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2095 /* Update vectorial force */
2096 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
2097 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2098 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2100 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2101 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2102 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2106 /**************************
2107 * CALCULATE INTERACTIONS *
2108 **************************/
2110 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
2113 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
2115 /* EWALD ELECTROSTATICS */
2117 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2118 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
2119 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2120 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2121 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2123 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2124 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2125 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
2127 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
2131 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2133 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2135 /* Update vectorial force */
2136 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
2137 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2138 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2140 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2141 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2142 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2146 /**************************
2147 * CALCULATE INTERACTIONS *
2148 **************************/
2150 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
2153 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
2155 /* EWALD ELECTROSTATICS */
2157 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2158 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
2159 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2160 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2161 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2163 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2164 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2165 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
2167 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
2171 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2173 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2175 /* Update vectorial force */
2176 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
2177 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2178 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2180 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2181 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2182 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2186 /**************************
2187 * CALCULATE INTERACTIONS *
2188 **************************/
2190 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
2193 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2195 /* EWALD ELECTROSTATICS */
2197 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2198 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
2199 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2200 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2201 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2203 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2204 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2205 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
2207 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
2211 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2213 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2215 /* Update vectorial force */
2216 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2217 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2218 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2220 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2221 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2222 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2226 /**************************
2227 * CALCULATE INTERACTIONS *
2228 **************************/
2230 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
2233 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2235 /* EWALD ELECTROSTATICS */
2237 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2238 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
2239 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2240 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2241 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2243 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2244 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2245 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2247 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
2251 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2253 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2255 /* Update vectorial force */
2256 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2257 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2258 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2260 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2261 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2262 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2266 /**************************
2267 * CALCULATE INTERACTIONS *
2268 **************************/
2270 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
2273 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2275 /* EWALD ELECTROSTATICS */
2277 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2278 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
2279 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2280 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2281 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2283 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2284 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2285 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
2287 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
2291 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2293 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2295 /* Update vectorial force */
2296 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2297 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2298 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2300 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2301 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2302 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2306 /**************************
2307 * CALCULATE INTERACTIONS *
2308 **************************/
2310 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
2313 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2315 /* EWALD ELECTROSTATICS */
2317 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2318 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
2319 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2320 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2321 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2323 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2324 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2325 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2327 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
2331 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2333 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2335 /* Update vectorial force */
2336 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2337 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2338 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2340 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2341 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2342 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2346 /**************************
2347 * CALCULATE INTERACTIONS *
2348 **************************/
2350 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
2353 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2355 /* EWALD ELECTROSTATICS */
2357 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2358 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
2359 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2360 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2361 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2363 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2364 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2365 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2367 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
2371 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2373 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2375 /* Update vectorial force */
2376 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2377 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2378 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2380 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2381 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2382 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2386 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2388 /* Inner loop uses 385 flops */
2391 /* End of innermost loop */
2393 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2394 f+i_coord_offset,fshift+i_shift_offset);
2396 /* Increment number of inner iterations */
2397 inneriter += j_index_end - j_index_start;
2399 /* Outer loop uses 18 flops */
2402 /* Increment number of outer iterations */
2405 /* Update outer/inner flops */
2407 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*385);