2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: Ewald
52 * VdW interaction: LennardJones
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist * gmx_restrict nlist,
59 rvec * gmx_restrict xx,
60 rvec * gmx_restrict ff,
61 t_forcerec * gmx_restrict fr,
62 t_mdatoms * gmx_restrict mdatoms,
63 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64 t_nrnb * gmx_restrict nrnb)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset,i_coord_offset,outeriter,inneriter;
72 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
74 int j_coord_offsetA,j_coord_offsetB;
75 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
77 real *shiftvec,*fshift,*x,*f;
78 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
80 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
82 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
84 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
85 int vdwjidx0A,vdwjidx0B;
86 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
87 int vdwjidx1A,vdwjidx1B;
88 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
89 int vdwjidx2A,vdwjidx2B;
90 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
91 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
92 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
93 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
94 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
95 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
96 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
97 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
98 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
99 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
100 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
103 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
106 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
107 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
108 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
111 _fjsp_v2r8 dummy_mask,cutoff_mask;
112 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
113 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
114 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
121 jindex = nlist->jindex;
123 shiftidx = nlist->shift;
125 shiftvec = fr->shift_vec[0];
126 fshift = fr->fshift[0];
127 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
128 charge = mdatoms->chargeA;
129 nvdwtype = fr->ntype;
131 vdwtype = mdatoms->typeA;
133 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
134 ewtab = fr->ic->tabq_coul_FDV0;
135 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
136 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
138 /* Setup water-specific parameters */
139 inr = nlist->iinr[0];
140 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
141 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
142 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
143 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
145 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
146 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
147 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
148 vdwjidx0A = 2*vdwtype[inr+0];
149 qq00 = _fjsp_mul_v2r8(iq0,jq0);
150 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
151 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
152 qq01 = _fjsp_mul_v2r8(iq0,jq1);
153 qq02 = _fjsp_mul_v2r8(iq0,jq2);
154 qq10 = _fjsp_mul_v2r8(iq1,jq0);
155 qq11 = _fjsp_mul_v2r8(iq1,jq1);
156 qq12 = _fjsp_mul_v2r8(iq1,jq2);
157 qq20 = _fjsp_mul_v2r8(iq2,jq0);
158 qq21 = _fjsp_mul_v2r8(iq2,jq1);
159 qq22 = _fjsp_mul_v2r8(iq2,jq2);
161 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
162 rcutoff_scalar = fr->rcoulomb;
163 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
164 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
166 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
167 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
169 /* Avoid stupid compiler warnings */
177 /* Start outer loop over neighborlists */
178 for(iidx=0; iidx<nri; iidx++)
180 /* Load shift vector for this list */
181 i_shift_offset = DIM*shiftidx[iidx];
183 /* Load limits for loop over neighbors */
184 j_index_start = jindex[iidx];
185 j_index_end = jindex[iidx+1];
187 /* Get outer coordinate index */
189 i_coord_offset = DIM*inr;
191 /* Load i particle coords and add shift vector */
192 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
193 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
195 fix0 = _fjsp_setzero_v2r8();
196 fiy0 = _fjsp_setzero_v2r8();
197 fiz0 = _fjsp_setzero_v2r8();
198 fix1 = _fjsp_setzero_v2r8();
199 fiy1 = _fjsp_setzero_v2r8();
200 fiz1 = _fjsp_setzero_v2r8();
201 fix2 = _fjsp_setzero_v2r8();
202 fiy2 = _fjsp_setzero_v2r8();
203 fiz2 = _fjsp_setzero_v2r8();
205 /* Reset potential sums */
206 velecsum = _fjsp_setzero_v2r8();
207 vvdwsum = _fjsp_setzero_v2r8();
209 /* Start inner kernel loop */
210 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
213 /* Get j neighbor index, and coordinate index */
216 j_coord_offsetA = DIM*jnrA;
217 j_coord_offsetB = DIM*jnrB;
219 /* load j atom coordinates */
220 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
221 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
223 /* Calculate displacement vector */
224 dx00 = _fjsp_sub_v2r8(ix0,jx0);
225 dy00 = _fjsp_sub_v2r8(iy0,jy0);
226 dz00 = _fjsp_sub_v2r8(iz0,jz0);
227 dx01 = _fjsp_sub_v2r8(ix0,jx1);
228 dy01 = _fjsp_sub_v2r8(iy0,jy1);
229 dz01 = _fjsp_sub_v2r8(iz0,jz1);
230 dx02 = _fjsp_sub_v2r8(ix0,jx2);
231 dy02 = _fjsp_sub_v2r8(iy0,jy2);
232 dz02 = _fjsp_sub_v2r8(iz0,jz2);
233 dx10 = _fjsp_sub_v2r8(ix1,jx0);
234 dy10 = _fjsp_sub_v2r8(iy1,jy0);
235 dz10 = _fjsp_sub_v2r8(iz1,jz0);
236 dx11 = _fjsp_sub_v2r8(ix1,jx1);
237 dy11 = _fjsp_sub_v2r8(iy1,jy1);
238 dz11 = _fjsp_sub_v2r8(iz1,jz1);
239 dx12 = _fjsp_sub_v2r8(ix1,jx2);
240 dy12 = _fjsp_sub_v2r8(iy1,jy2);
241 dz12 = _fjsp_sub_v2r8(iz1,jz2);
242 dx20 = _fjsp_sub_v2r8(ix2,jx0);
243 dy20 = _fjsp_sub_v2r8(iy2,jy0);
244 dz20 = _fjsp_sub_v2r8(iz2,jz0);
245 dx21 = _fjsp_sub_v2r8(ix2,jx1);
246 dy21 = _fjsp_sub_v2r8(iy2,jy1);
247 dz21 = _fjsp_sub_v2r8(iz2,jz1);
248 dx22 = _fjsp_sub_v2r8(ix2,jx2);
249 dy22 = _fjsp_sub_v2r8(iy2,jy2);
250 dz22 = _fjsp_sub_v2r8(iz2,jz2);
252 /* Calculate squared distance and things based on it */
253 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
254 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
255 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
256 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
257 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
258 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
259 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
260 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
261 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
263 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
264 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
265 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
266 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
267 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
268 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
269 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
270 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
271 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
273 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
274 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
275 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
276 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
277 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
278 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
279 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
280 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
281 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
283 fjx0 = _fjsp_setzero_v2r8();
284 fjy0 = _fjsp_setzero_v2r8();
285 fjz0 = _fjsp_setzero_v2r8();
286 fjx1 = _fjsp_setzero_v2r8();
287 fjy1 = _fjsp_setzero_v2r8();
288 fjz1 = _fjsp_setzero_v2r8();
289 fjx2 = _fjsp_setzero_v2r8();
290 fjy2 = _fjsp_setzero_v2r8();
291 fjz2 = _fjsp_setzero_v2r8();
293 /**************************
294 * CALCULATE INTERACTIONS *
295 **************************/
297 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
300 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
302 /* EWALD ELECTROSTATICS */
304 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
305 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
306 itab_tmp = _fjsp_dtox_v2r8(ewrt);
307 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
308 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
310 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
311 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
312 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
313 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
314 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
315 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
316 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
317 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
318 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
319 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
321 /* LENNARD-JONES DISPERSION/REPULSION */
323 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
324 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
325 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
326 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
327 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
328 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
330 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
332 /* Update potential sum for this i atom from the interaction with this j atom. */
333 velec = _fjsp_and_v2r8(velec,cutoff_mask);
334 velecsum = _fjsp_add_v2r8(velecsum,velec);
335 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
336 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
338 fscal = _fjsp_add_v2r8(felec,fvdw);
340 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
342 /* Update vectorial force */
343 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
344 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
345 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
347 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
348 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
349 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
353 /**************************
354 * CALCULATE INTERACTIONS *
355 **************************/
357 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
360 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
362 /* EWALD ELECTROSTATICS */
364 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
365 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
366 itab_tmp = _fjsp_dtox_v2r8(ewrt);
367 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
368 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
370 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
371 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
372 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
373 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
374 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
375 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
376 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
377 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
378 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
379 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
381 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
383 /* Update potential sum for this i atom from the interaction with this j atom. */
384 velec = _fjsp_and_v2r8(velec,cutoff_mask);
385 velecsum = _fjsp_add_v2r8(velecsum,velec);
389 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
391 /* Update vectorial force */
392 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
393 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
394 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
396 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
397 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
398 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
402 /**************************
403 * CALCULATE INTERACTIONS *
404 **************************/
406 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
409 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
411 /* EWALD ELECTROSTATICS */
413 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
414 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
415 itab_tmp = _fjsp_dtox_v2r8(ewrt);
416 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
417 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
419 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
420 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
421 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
422 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
423 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
424 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
425 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
426 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
427 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
428 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
430 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
432 /* Update potential sum for this i atom from the interaction with this j atom. */
433 velec = _fjsp_and_v2r8(velec,cutoff_mask);
434 velecsum = _fjsp_add_v2r8(velecsum,velec);
438 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
440 /* Update vectorial force */
441 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
442 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
443 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
445 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
446 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
447 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
451 /**************************
452 * CALCULATE INTERACTIONS *
453 **************************/
455 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
458 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
460 /* EWALD ELECTROSTATICS */
462 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
463 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
464 itab_tmp = _fjsp_dtox_v2r8(ewrt);
465 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
466 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
468 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
469 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
470 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
471 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
472 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
473 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
474 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
475 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
476 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
477 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
479 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
481 /* Update potential sum for this i atom from the interaction with this j atom. */
482 velec = _fjsp_and_v2r8(velec,cutoff_mask);
483 velecsum = _fjsp_add_v2r8(velecsum,velec);
487 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
489 /* Update vectorial force */
490 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
491 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
492 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
494 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
495 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
496 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
500 /**************************
501 * CALCULATE INTERACTIONS *
502 **************************/
504 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
507 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
509 /* EWALD ELECTROSTATICS */
511 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
512 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
513 itab_tmp = _fjsp_dtox_v2r8(ewrt);
514 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
515 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
517 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
518 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
519 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
520 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
521 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
522 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
523 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
524 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
525 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
526 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
528 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
530 /* Update potential sum for this i atom from the interaction with this j atom. */
531 velec = _fjsp_and_v2r8(velec,cutoff_mask);
532 velecsum = _fjsp_add_v2r8(velecsum,velec);
536 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
538 /* Update vectorial force */
539 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
540 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
541 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
543 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
544 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
545 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
549 /**************************
550 * CALCULATE INTERACTIONS *
551 **************************/
553 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
556 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
558 /* EWALD ELECTROSTATICS */
560 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
561 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
562 itab_tmp = _fjsp_dtox_v2r8(ewrt);
563 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
564 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
566 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
567 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
568 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
569 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
570 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
571 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
572 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
573 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
574 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
575 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
577 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
579 /* Update potential sum for this i atom from the interaction with this j atom. */
580 velec = _fjsp_and_v2r8(velec,cutoff_mask);
581 velecsum = _fjsp_add_v2r8(velecsum,velec);
585 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
587 /* Update vectorial force */
588 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
589 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
590 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
592 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
593 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
594 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
598 /**************************
599 * CALCULATE INTERACTIONS *
600 **************************/
602 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
605 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
607 /* EWALD ELECTROSTATICS */
609 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
610 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
611 itab_tmp = _fjsp_dtox_v2r8(ewrt);
612 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
613 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
615 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
616 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
617 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
618 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
619 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
620 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
621 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
622 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
623 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
624 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
626 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
628 /* Update potential sum for this i atom from the interaction with this j atom. */
629 velec = _fjsp_and_v2r8(velec,cutoff_mask);
630 velecsum = _fjsp_add_v2r8(velecsum,velec);
634 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
636 /* Update vectorial force */
637 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
638 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
639 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
641 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
642 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
643 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
647 /**************************
648 * CALCULATE INTERACTIONS *
649 **************************/
651 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
654 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
656 /* EWALD ELECTROSTATICS */
658 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
659 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
660 itab_tmp = _fjsp_dtox_v2r8(ewrt);
661 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
662 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
664 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
665 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
666 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
667 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
668 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
669 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
670 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
671 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
672 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
673 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
675 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
677 /* Update potential sum for this i atom from the interaction with this j atom. */
678 velec = _fjsp_and_v2r8(velec,cutoff_mask);
679 velecsum = _fjsp_add_v2r8(velecsum,velec);
683 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
685 /* Update vectorial force */
686 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
687 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
688 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
690 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
691 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
692 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
696 /**************************
697 * CALCULATE INTERACTIONS *
698 **************************/
700 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
703 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
705 /* EWALD ELECTROSTATICS */
707 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
708 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
709 itab_tmp = _fjsp_dtox_v2r8(ewrt);
710 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
711 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
713 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
714 ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
715 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
716 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
717 ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
718 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
719 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
720 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
721 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
722 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
724 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
726 /* Update potential sum for this i atom from the interaction with this j atom. */
727 velec = _fjsp_and_v2r8(velec,cutoff_mask);
728 velecsum = _fjsp_add_v2r8(velecsum,velec);
732 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
734 /* Update vectorial force */
735 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
736 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
737 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
739 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
740 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
741 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
745 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
747 /* Inner loop uses 459 flops */
754 j_coord_offsetA = DIM*jnrA;
756 /* load j atom coordinates */
757 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
758 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
760 /* Calculate displacement vector */
761 dx00 = _fjsp_sub_v2r8(ix0,jx0);
762 dy00 = _fjsp_sub_v2r8(iy0,jy0);
763 dz00 = _fjsp_sub_v2r8(iz0,jz0);
764 dx01 = _fjsp_sub_v2r8(ix0,jx1);
765 dy01 = _fjsp_sub_v2r8(iy0,jy1);
766 dz01 = _fjsp_sub_v2r8(iz0,jz1);
767 dx02 = _fjsp_sub_v2r8(ix0,jx2);
768 dy02 = _fjsp_sub_v2r8(iy0,jy2);
769 dz02 = _fjsp_sub_v2r8(iz0,jz2);
770 dx10 = _fjsp_sub_v2r8(ix1,jx0);
771 dy10 = _fjsp_sub_v2r8(iy1,jy0);
772 dz10 = _fjsp_sub_v2r8(iz1,jz0);
773 dx11 = _fjsp_sub_v2r8(ix1,jx1);
774 dy11 = _fjsp_sub_v2r8(iy1,jy1);
775 dz11 = _fjsp_sub_v2r8(iz1,jz1);
776 dx12 = _fjsp_sub_v2r8(ix1,jx2);
777 dy12 = _fjsp_sub_v2r8(iy1,jy2);
778 dz12 = _fjsp_sub_v2r8(iz1,jz2);
779 dx20 = _fjsp_sub_v2r8(ix2,jx0);
780 dy20 = _fjsp_sub_v2r8(iy2,jy0);
781 dz20 = _fjsp_sub_v2r8(iz2,jz0);
782 dx21 = _fjsp_sub_v2r8(ix2,jx1);
783 dy21 = _fjsp_sub_v2r8(iy2,jy1);
784 dz21 = _fjsp_sub_v2r8(iz2,jz1);
785 dx22 = _fjsp_sub_v2r8(ix2,jx2);
786 dy22 = _fjsp_sub_v2r8(iy2,jy2);
787 dz22 = _fjsp_sub_v2r8(iz2,jz2);
789 /* Calculate squared distance and things based on it */
790 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
791 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
792 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
793 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
794 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
795 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
796 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
797 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
798 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
800 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
801 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
802 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
803 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
804 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
805 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
806 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
807 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
808 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
810 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
811 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
812 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
813 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
814 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
815 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
816 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
817 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
818 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
820 fjx0 = _fjsp_setzero_v2r8();
821 fjy0 = _fjsp_setzero_v2r8();
822 fjz0 = _fjsp_setzero_v2r8();
823 fjx1 = _fjsp_setzero_v2r8();
824 fjy1 = _fjsp_setzero_v2r8();
825 fjz1 = _fjsp_setzero_v2r8();
826 fjx2 = _fjsp_setzero_v2r8();
827 fjy2 = _fjsp_setzero_v2r8();
828 fjz2 = _fjsp_setzero_v2r8();
830 /**************************
831 * CALCULATE INTERACTIONS *
832 **************************/
834 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
837 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
839 /* EWALD ELECTROSTATICS */
841 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
842 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
843 itab_tmp = _fjsp_dtox_v2r8(ewrt);
844 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
845 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
847 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
848 ewtabD = _fjsp_setzero_v2r8();
849 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
850 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
851 ewtabFn = _fjsp_setzero_v2r8();
852 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
853 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
854 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
855 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
856 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
858 /* LENNARD-JONES DISPERSION/REPULSION */
860 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
861 vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
862 vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
863 vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
864 _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
865 fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
867 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
869 /* Update potential sum for this i atom from the interaction with this j atom. */
870 velec = _fjsp_and_v2r8(velec,cutoff_mask);
871 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
872 velecsum = _fjsp_add_v2r8(velecsum,velec);
873 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
874 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
875 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
877 fscal = _fjsp_add_v2r8(felec,fvdw);
879 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
881 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
883 /* Update vectorial force */
884 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
885 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
886 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
888 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
889 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
890 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
894 /**************************
895 * CALCULATE INTERACTIONS *
896 **************************/
898 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
901 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
903 /* EWALD ELECTROSTATICS */
905 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
906 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
907 itab_tmp = _fjsp_dtox_v2r8(ewrt);
908 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
909 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
911 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
912 ewtabD = _fjsp_setzero_v2r8();
913 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
914 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
915 ewtabFn = _fjsp_setzero_v2r8();
916 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
917 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
918 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
919 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
920 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
922 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
924 /* Update potential sum for this i atom from the interaction with this j atom. */
925 velec = _fjsp_and_v2r8(velec,cutoff_mask);
926 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
927 velecsum = _fjsp_add_v2r8(velecsum,velec);
931 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
933 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
935 /* Update vectorial force */
936 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
937 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
938 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
940 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
941 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
942 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
946 /**************************
947 * CALCULATE INTERACTIONS *
948 **************************/
950 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
953 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
955 /* EWALD ELECTROSTATICS */
957 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
958 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
959 itab_tmp = _fjsp_dtox_v2r8(ewrt);
960 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
961 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
963 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
964 ewtabD = _fjsp_setzero_v2r8();
965 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
966 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
967 ewtabFn = _fjsp_setzero_v2r8();
968 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
969 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
970 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
971 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
972 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
974 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
976 /* Update potential sum for this i atom from the interaction with this j atom. */
977 velec = _fjsp_and_v2r8(velec,cutoff_mask);
978 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
979 velecsum = _fjsp_add_v2r8(velecsum,velec);
983 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
985 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
987 /* Update vectorial force */
988 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
989 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
990 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
992 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
993 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
994 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
998 /**************************
999 * CALCULATE INTERACTIONS *
1000 **************************/
1002 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1005 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1007 /* EWALD ELECTROSTATICS */
1009 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1010 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1011 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1012 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1013 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1015 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1016 ewtabD = _fjsp_setzero_v2r8();
1017 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1018 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1019 ewtabFn = _fjsp_setzero_v2r8();
1020 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1021 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1022 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1023 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
1024 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1026 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1028 /* Update potential sum for this i atom from the interaction with this j atom. */
1029 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1030 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1031 velecsum = _fjsp_add_v2r8(velecsum,velec);
1035 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1037 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1039 /* Update vectorial force */
1040 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1041 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1042 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1044 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1045 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1046 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1050 /**************************
1051 * CALCULATE INTERACTIONS *
1052 **************************/
1054 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1057 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1059 /* EWALD ELECTROSTATICS */
1061 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1062 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1063 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1064 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1065 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1067 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1068 ewtabD = _fjsp_setzero_v2r8();
1069 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1070 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1071 ewtabFn = _fjsp_setzero_v2r8();
1072 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1073 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1074 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1075 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
1076 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1078 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1080 /* Update potential sum for this i atom from the interaction with this j atom. */
1081 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1082 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1083 velecsum = _fjsp_add_v2r8(velecsum,velec);
1087 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1089 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1091 /* Update vectorial force */
1092 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1093 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1094 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1096 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1097 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1098 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1102 /**************************
1103 * CALCULATE INTERACTIONS *
1104 **************************/
1106 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1109 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1111 /* EWALD ELECTROSTATICS */
1113 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1114 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1115 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1116 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1117 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1119 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1120 ewtabD = _fjsp_setzero_v2r8();
1121 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1122 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1123 ewtabFn = _fjsp_setzero_v2r8();
1124 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1125 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1126 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1127 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
1128 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1130 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1132 /* Update potential sum for this i atom from the interaction with this j atom. */
1133 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1134 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1135 velecsum = _fjsp_add_v2r8(velecsum,velec);
1139 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1141 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1143 /* Update vectorial force */
1144 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1145 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1146 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1148 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1149 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1150 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1154 /**************************
1155 * CALCULATE INTERACTIONS *
1156 **************************/
1158 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1161 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1163 /* EWALD ELECTROSTATICS */
1165 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1166 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1167 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1168 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1169 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1171 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1172 ewtabD = _fjsp_setzero_v2r8();
1173 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1174 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1175 ewtabFn = _fjsp_setzero_v2r8();
1176 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1177 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1178 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1179 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
1180 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1182 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1184 /* Update potential sum for this i atom from the interaction with this j atom. */
1185 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1186 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1187 velecsum = _fjsp_add_v2r8(velecsum,velec);
1191 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1193 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1195 /* Update vectorial force */
1196 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1197 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1198 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1200 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1201 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1202 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1206 /**************************
1207 * CALCULATE INTERACTIONS *
1208 **************************/
1210 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1213 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1215 /* EWALD ELECTROSTATICS */
1217 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1218 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1219 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1220 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1221 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1223 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1224 ewtabD = _fjsp_setzero_v2r8();
1225 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1226 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1227 ewtabFn = _fjsp_setzero_v2r8();
1228 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1229 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1230 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1231 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
1232 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1234 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1236 /* Update potential sum for this i atom from the interaction with this j atom. */
1237 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1238 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1239 velecsum = _fjsp_add_v2r8(velecsum,velec);
1243 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1245 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1247 /* Update vectorial force */
1248 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1249 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1250 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1252 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1253 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1254 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1258 /**************************
1259 * CALCULATE INTERACTIONS *
1260 **************************/
1262 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1265 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1267 /* EWALD ELECTROSTATICS */
1269 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1270 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1271 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1272 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1273 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1275 ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
1276 ewtabD = _fjsp_setzero_v2r8();
1277 GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
1278 ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
1279 ewtabFn = _fjsp_setzero_v2r8();
1280 GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
1281 felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
1282 velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
1283 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
1284 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1286 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1288 /* Update potential sum for this i atom from the interaction with this j atom. */
1289 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1290 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1291 velecsum = _fjsp_add_v2r8(velecsum,velec);
1295 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1297 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1299 /* Update vectorial force */
1300 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1301 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1302 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1304 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1305 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1306 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1310 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1312 /* Inner loop uses 459 flops */
1315 /* End of innermost loop */
1317 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1318 f+i_coord_offset,fshift+i_shift_offset);
1321 /* Update potential energies */
1322 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1323 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1325 /* Increment number of inner iterations */
1326 inneriter += j_index_end - j_index_start;
1328 /* Outer loop uses 20 flops */
1331 /* Increment number of outer iterations */
1334 /* Update outer/inner flops */
1336 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459);
1339 * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1340 * Electrostatics interaction: Ewald
1341 * VdW interaction: LennardJones
1342 * Geometry: Water3-Water3
1343 * Calculate force/pot: Force
1346 nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
1347 (t_nblist * gmx_restrict nlist,
1348 rvec * gmx_restrict xx,
1349 rvec * gmx_restrict ff,
1350 t_forcerec * gmx_restrict fr,
1351 t_mdatoms * gmx_restrict mdatoms,
1352 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1353 t_nrnb * gmx_restrict nrnb)
1355 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1356 * just 0 for non-waters.
1357 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1358 * jnr indices corresponding to data put in the four positions in the SIMD register.
1360 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1361 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1363 int j_coord_offsetA,j_coord_offsetB;
1364 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1365 real rcutoff_scalar;
1366 real *shiftvec,*fshift,*x,*f;
1367 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1369 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1371 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1373 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1374 int vdwjidx0A,vdwjidx0B;
1375 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1376 int vdwjidx1A,vdwjidx1B;
1377 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1378 int vdwjidx2A,vdwjidx2B;
1379 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1380 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1381 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1382 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1383 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1384 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1385 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1386 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1387 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1388 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1389 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1392 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1395 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1396 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1397 _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
1399 _fjsp_v2r8 itab_tmp;
1400 _fjsp_v2r8 dummy_mask,cutoff_mask;
1401 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1402 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1403 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1410 jindex = nlist->jindex;
1412 shiftidx = nlist->shift;
1414 shiftvec = fr->shift_vec[0];
1415 fshift = fr->fshift[0];
1416 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1417 charge = mdatoms->chargeA;
1418 nvdwtype = fr->ntype;
1419 vdwparam = fr->nbfp;
1420 vdwtype = mdatoms->typeA;
1422 sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
1423 ewtab = fr->ic->tabq_coul_F;
1424 ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
1425 ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
1427 /* Setup water-specific parameters */
1428 inr = nlist->iinr[0];
1429 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1430 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1431 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1432 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1434 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1435 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1436 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1437 vdwjidx0A = 2*vdwtype[inr+0];
1438 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1439 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1440 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1441 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1442 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1443 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1444 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1445 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1446 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1447 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1448 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1450 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1451 rcutoff_scalar = fr->rcoulomb;
1452 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1453 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1455 sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
1456 rvdw = gmx_fjsp_set1_v2r8(fr->rvdw);
1458 /* Avoid stupid compiler warnings */
1460 j_coord_offsetA = 0;
1461 j_coord_offsetB = 0;
1466 /* Start outer loop over neighborlists */
1467 for(iidx=0; iidx<nri; iidx++)
1469 /* Load shift vector for this list */
1470 i_shift_offset = DIM*shiftidx[iidx];
1472 /* Load limits for loop over neighbors */
1473 j_index_start = jindex[iidx];
1474 j_index_end = jindex[iidx+1];
1476 /* Get outer coordinate index */
1478 i_coord_offset = DIM*inr;
1480 /* Load i particle coords and add shift vector */
1481 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1482 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1484 fix0 = _fjsp_setzero_v2r8();
1485 fiy0 = _fjsp_setzero_v2r8();
1486 fiz0 = _fjsp_setzero_v2r8();
1487 fix1 = _fjsp_setzero_v2r8();
1488 fiy1 = _fjsp_setzero_v2r8();
1489 fiz1 = _fjsp_setzero_v2r8();
1490 fix2 = _fjsp_setzero_v2r8();
1491 fiy2 = _fjsp_setzero_v2r8();
1492 fiz2 = _fjsp_setzero_v2r8();
1494 /* Start inner kernel loop */
1495 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1498 /* Get j neighbor index, and coordinate index */
1500 jnrB = jjnr[jidx+1];
1501 j_coord_offsetA = DIM*jnrA;
1502 j_coord_offsetB = DIM*jnrB;
1504 /* load j atom coordinates */
1505 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1506 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1508 /* Calculate displacement vector */
1509 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1510 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1511 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1512 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1513 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1514 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1515 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1516 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1517 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1518 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1519 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1520 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1521 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1522 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1523 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1524 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1525 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1526 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1527 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1528 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1529 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1530 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1531 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1532 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1533 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1534 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1535 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1537 /* Calculate squared distance and things based on it */
1538 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1539 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1540 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1541 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1542 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1543 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1544 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1545 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1546 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1548 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1549 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1550 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1551 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1552 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1553 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1554 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1555 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1556 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1558 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1559 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1560 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1561 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1562 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1563 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1564 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1565 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1566 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1568 fjx0 = _fjsp_setzero_v2r8();
1569 fjy0 = _fjsp_setzero_v2r8();
1570 fjz0 = _fjsp_setzero_v2r8();
1571 fjx1 = _fjsp_setzero_v2r8();
1572 fjy1 = _fjsp_setzero_v2r8();
1573 fjz1 = _fjsp_setzero_v2r8();
1574 fjx2 = _fjsp_setzero_v2r8();
1575 fjy2 = _fjsp_setzero_v2r8();
1576 fjz2 = _fjsp_setzero_v2r8();
1578 /**************************
1579 * CALCULATE INTERACTIONS *
1580 **************************/
1582 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1585 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1587 /* EWALD ELECTROSTATICS */
1589 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1590 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
1591 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1592 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1593 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1595 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1597 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1598 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
1600 /* LENNARD-JONES DISPERSION/REPULSION */
1602 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
1603 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
1605 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1607 fscal = _fjsp_add_v2r8(felec,fvdw);
1609 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1611 /* Update vectorial force */
1612 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1613 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1614 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1616 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1617 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1618 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1622 /**************************
1623 * CALCULATE INTERACTIONS *
1624 **************************/
1626 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1629 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
1631 /* EWALD ELECTROSTATICS */
1633 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1634 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
1635 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1636 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1637 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1639 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1641 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1642 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
1644 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1648 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1650 /* Update vectorial force */
1651 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1652 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1653 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1655 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1656 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1657 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1661 /**************************
1662 * CALCULATE INTERACTIONS *
1663 **************************/
1665 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1668 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
1670 /* EWALD ELECTROSTATICS */
1672 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1673 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
1674 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1675 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1676 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1678 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1680 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1681 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
1683 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1687 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1689 /* Update vectorial force */
1690 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1691 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1692 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1694 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1695 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1696 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1700 /**************************
1701 * CALCULATE INTERACTIONS *
1702 **************************/
1704 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1707 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
1709 /* EWALD ELECTROSTATICS */
1711 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1712 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
1713 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1714 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1715 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1717 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1719 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1720 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
1722 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1726 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1728 /* Update vectorial force */
1729 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1730 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1731 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1733 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1734 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1735 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1739 /**************************
1740 * CALCULATE INTERACTIONS *
1741 **************************/
1743 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1746 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
1748 /* EWALD ELECTROSTATICS */
1750 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1751 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
1752 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1753 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1754 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1756 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1758 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1759 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
1761 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1765 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1767 /* Update vectorial force */
1768 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1769 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1770 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1772 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1773 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1774 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1778 /**************************
1779 * CALCULATE INTERACTIONS *
1780 **************************/
1782 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1785 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
1787 /* EWALD ELECTROSTATICS */
1789 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1790 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
1791 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1792 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1793 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1795 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1797 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1798 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
1800 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1804 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1806 /* Update vectorial force */
1807 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1808 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1809 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1811 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1812 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1813 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1817 /**************************
1818 * CALCULATE INTERACTIONS *
1819 **************************/
1821 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1824 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
1826 /* EWALD ELECTROSTATICS */
1828 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1829 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
1830 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1831 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1832 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1834 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1836 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1837 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
1839 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1843 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1845 /* Update vectorial force */
1846 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1847 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1848 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1850 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1851 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1852 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1856 /**************************
1857 * CALCULATE INTERACTIONS *
1858 **************************/
1860 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1863 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
1865 /* EWALD ELECTROSTATICS */
1867 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1868 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
1869 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1870 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1871 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1873 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1875 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1876 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
1878 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1882 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1884 /* Update vectorial force */
1885 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1886 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1887 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1889 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1890 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1891 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1895 /**************************
1896 * CALCULATE INTERACTIONS *
1897 **************************/
1899 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1902 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
1904 /* EWALD ELECTROSTATICS */
1906 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
1907 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
1908 itab_tmp = _fjsp_dtox_v2r8(ewrt);
1909 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
1910 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
1912 gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
1914 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
1915 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
1917 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1921 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1923 /* Update vectorial force */
1924 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1925 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1926 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1928 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1929 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1930 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1934 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1936 /* Inner loop uses 385 flops */
1939 if(jidx<j_index_end)
1943 j_coord_offsetA = DIM*jnrA;
1945 /* load j atom coordinates */
1946 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1947 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1949 /* Calculate displacement vector */
1950 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1951 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1952 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1953 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1954 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1955 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1956 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1957 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1958 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1959 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1960 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1961 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1962 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1963 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1964 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1965 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1966 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1967 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1968 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1969 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1970 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1971 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1972 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1973 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1974 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1975 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1976 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1978 /* Calculate squared distance and things based on it */
1979 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1980 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1981 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1982 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1983 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1984 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1985 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1986 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1987 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1989 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1990 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1991 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1992 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1993 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1994 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1995 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1996 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1997 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1999 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
2000 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
2001 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
2002 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
2003 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
2004 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
2005 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
2006 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
2007 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
2009 fjx0 = _fjsp_setzero_v2r8();
2010 fjy0 = _fjsp_setzero_v2r8();
2011 fjz0 = _fjsp_setzero_v2r8();
2012 fjx1 = _fjsp_setzero_v2r8();
2013 fjy1 = _fjsp_setzero_v2r8();
2014 fjz1 = _fjsp_setzero_v2r8();
2015 fjx2 = _fjsp_setzero_v2r8();
2016 fjy2 = _fjsp_setzero_v2r8();
2017 fjz2 = _fjsp_setzero_v2r8();
2019 /**************************
2020 * CALCULATE INTERACTIONS *
2021 **************************/
2023 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
2026 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
2028 /* EWALD ELECTROSTATICS */
2030 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2031 ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
2032 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2033 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2034 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2036 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2037 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2038 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
2040 /* LENNARD-JONES DISPERSION/REPULSION */
2042 rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
2043 fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
2045 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
2047 fscal = _fjsp_add_v2r8(felec,fvdw);
2049 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2051 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2053 /* Update vectorial force */
2054 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
2055 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
2056 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
2058 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
2059 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
2060 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
2064 /**************************
2065 * CALCULATE INTERACTIONS *
2066 **************************/
2068 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
2071 r01 = _fjsp_mul_v2r8(rsq01,rinv01);
2073 /* EWALD ELECTROSTATICS */
2075 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2076 ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
2077 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2078 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2079 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2081 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2082 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2083 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
2085 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
2089 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2091 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2093 /* Update vectorial force */
2094 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
2095 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
2096 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
2098 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
2099 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
2100 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
2104 /**************************
2105 * CALCULATE INTERACTIONS *
2106 **************************/
2108 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
2111 r02 = _fjsp_mul_v2r8(rsq02,rinv02);
2113 /* EWALD ELECTROSTATICS */
2115 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2116 ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
2117 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2118 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2119 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2121 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2122 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2123 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
2125 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
2129 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2131 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2133 /* Update vectorial force */
2134 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
2135 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
2136 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
2138 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
2139 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
2140 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
2144 /**************************
2145 * CALCULATE INTERACTIONS *
2146 **************************/
2148 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
2151 r10 = _fjsp_mul_v2r8(rsq10,rinv10);
2153 /* EWALD ELECTROSTATICS */
2155 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2156 ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
2157 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2158 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2159 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2161 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2162 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2163 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
2165 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
2169 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2171 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2173 /* Update vectorial force */
2174 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
2175 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
2176 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
2178 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
2179 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
2180 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
2184 /**************************
2185 * CALCULATE INTERACTIONS *
2186 **************************/
2188 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
2191 r11 = _fjsp_mul_v2r8(rsq11,rinv11);
2193 /* EWALD ELECTROSTATICS */
2195 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2196 ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
2197 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2198 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2199 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2201 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2202 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2203 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
2205 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
2209 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2211 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2213 /* Update vectorial force */
2214 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
2215 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
2216 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
2218 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
2219 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
2220 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
2224 /**************************
2225 * CALCULATE INTERACTIONS *
2226 **************************/
2228 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
2231 r12 = _fjsp_mul_v2r8(rsq12,rinv12);
2233 /* EWALD ELECTROSTATICS */
2235 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2236 ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
2237 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2238 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2239 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2241 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2242 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2243 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
2245 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
2249 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2251 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2253 /* Update vectorial force */
2254 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
2255 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
2256 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
2258 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
2259 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
2260 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
2264 /**************************
2265 * CALCULATE INTERACTIONS *
2266 **************************/
2268 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
2271 r20 = _fjsp_mul_v2r8(rsq20,rinv20);
2273 /* EWALD ELECTROSTATICS */
2275 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2276 ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
2277 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2278 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2279 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2281 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2282 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2283 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
2285 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
2289 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2291 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2293 /* Update vectorial force */
2294 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
2295 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
2296 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
2298 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
2299 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
2300 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
2304 /**************************
2305 * CALCULATE INTERACTIONS *
2306 **************************/
2308 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
2311 r21 = _fjsp_mul_v2r8(rsq21,rinv21);
2313 /* EWALD ELECTROSTATICS */
2315 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2316 ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
2317 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2318 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2319 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2321 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2322 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2323 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
2325 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
2329 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2331 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2333 /* Update vectorial force */
2334 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
2335 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
2336 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
2338 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
2339 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
2340 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
2344 /**************************
2345 * CALCULATE INTERACTIONS *
2346 **************************/
2348 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
2351 r22 = _fjsp_mul_v2r8(rsq22,rinv22);
2353 /* EWALD ELECTROSTATICS */
2355 /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
2356 ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
2357 itab_tmp = _fjsp_dtox_v2r8(ewrt);
2358 eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
2359 _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
2361 gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
2362 felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
2363 felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
2365 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
2369 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
2371 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
2373 /* Update vectorial force */
2374 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
2375 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
2376 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
2378 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
2379 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
2380 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
2384 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
2386 /* Inner loop uses 385 flops */
2389 /* End of innermost loop */
2391 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
2392 f+i_coord_offset,fshift+i_shift_offset);
2394 /* Increment number of inner iterations */
2395 inneriter += j_index_end - j_index_start;
2397 /* Outer loop uses 18 flops */
2400 /* Increment number of outer iterations */
2403 /* Update outer/inner flops */
2405 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*385);