2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
49 #include "kernelutil_sparc64_hpc_ace_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
53 * Electrostatics interaction: ReactionField
54 * VdW interaction: CubicSplineTable
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
60 (t_nblist * gmx_restrict nlist,
61 rvec * gmx_restrict xx,
62 rvec * gmx_restrict ff,
63 t_forcerec * gmx_restrict fr,
64 t_mdatoms * gmx_restrict mdatoms,
65 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
66 t_nrnb * gmx_restrict nrnb)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset,i_coord_offset,outeriter,inneriter;
74 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
76 int j_coord_offsetA,j_coord_offsetB;
77 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
79 real *shiftvec,*fshift,*x,*f;
80 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
82 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
84 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
86 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
87 int vdwjidx0A,vdwjidx0B;
88 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
89 int vdwjidx1A,vdwjidx1B;
90 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
91 int vdwjidx2A,vdwjidx2B;
92 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
93 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
94 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
95 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
96 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
97 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
98 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
99 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
100 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
101 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
102 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
105 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
108 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
109 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
110 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
113 _fjsp_v2r8 dummy_mask,cutoff_mask;
114 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
115 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
116 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
123 jindex = nlist->jindex;
125 shiftidx = nlist->shift;
127 shiftvec = fr->shift_vec[0];
128 fshift = fr->fshift[0];
129 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
130 charge = mdatoms->chargeA;
131 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
132 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
133 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
134 nvdwtype = fr->ntype;
136 vdwtype = mdatoms->typeA;
138 vftab = kernel_data->table_vdw->data;
139 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
141 /* Setup water-specific parameters */
142 inr = nlist->iinr[0];
143 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
144 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
145 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
146 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
148 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
149 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
150 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
151 vdwjidx0A = 2*vdwtype[inr+0];
152 qq00 = _fjsp_mul_v2r8(iq0,jq0);
153 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
154 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
155 qq01 = _fjsp_mul_v2r8(iq0,jq1);
156 qq02 = _fjsp_mul_v2r8(iq0,jq2);
157 qq10 = _fjsp_mul_v2r8(iq1,jq0);
158 qq11 = _fjsp_mul_v2r8(iq1,jq1);
159 qq12 = _fjsp_mul_v2r8(iq1,jq2);
160 qq20 = _fjsp_mul_v2r8(iq2,jq0);
161 qq21 = _fjsp_mul_v2r8(iq2,jq1);
162 qq22 = _fjsp_mul_v2r8(iq2,jq2);
164 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
165 rcutoff_scalar = fr->rcoulomb;
166 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
167 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
169 /* Avoid stupid compiler warnings */
177 /* Start outer loop over neighborlists */
178 for(iidx=0; iidx<nri; iidx++)
180 /* Load shift vector for this list */
181 i_shift_offset = DIM*shiftidx[iidx];
183 /* Load limits for loop over neighbors */
184 j_index_start = jindex[iidx];
185 j_index_end = jindex[iidx+1];
187 /* Get outer coordinate index */
189 i_coord_offset = DIM*inr;
191 /* Load i particle coords and add shift vector */
192 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
193 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
195 fix0 = _fjsp_setzero_v2r8();
196 fiy0 = _fjsp_setzero_v2r8();
197 fiz0 = _fjsp_setzero_v2r8();
198 fix1 = _fjsp_setzero_v2r8();
199 fiy1 = _fjsp_setzero_v2r8();
200 fiz1 = _fjsp_setzero_v2r8();
201 fix2 = _fjsp_setzero_v2r8();
202 fiy2 = _fjsp_setzero_v2r8();
203 fiz2 = _fjsp_setzero_v2r8();
205 /* Reset potential sums */
206 velecsum = _fjsp_setzero_v2r8();
207 vvdwsum = _fjsp_setzero_v2r8();
209 /* Start inner kernel loop */
210 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
213 /* Get j neighbor index, and coordinate index */
216 j_coord_offsetA = DIM*jnrA;
217 j_coord_offsetB = DIM*jnrB;
219 /* load j atom coordinates */
220 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
221 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
223 /* Calculate displacement vector */
224 dx00 = _fjsp_sub_v2r8(ix0,jx0);
225 dy00 = _fjsp_sub_v2r8(iy0,jy0);
226 dz00 = _fjsp_sub_v2r8(iz0,jz0);
227 dx01 = _fjsp_sub_v2r8(ix0,jx1);
228 dy01 = _fjsp_sub_v2r8(iy0,jy1);
229 dz01 = _fjsp_sub_v2r8(iz0,jz1);
230 dx02 = _fjsp_sub_v2r8(ix0,jx2);
231 dy02 = _fjsp_sub_v2r8(iy0,jy2);
232 dz02 = _fjsp_sub_v2r8(iz0,jz2);
233 dx10 = _fjsp_sub_v2r8(ix1,jx0);
234 dy10 = _fjsp_sub_v2r8(iy1,jy0);
235 dz10 = _fjsp_sub_v2r8(iz1,jz0);
236 dx11 = _fjsp_sub_v2r8(ix1,jx1);
237 dy11 = _fjsp_sub_v2r8(iy1,jy1);
238 dz11 = _fjsp_sub_v2r8(iz1,jz1);
239 dx12 = _fjsp_sub_v2r8(ix1,jx2);
240 dy12 = _fjsp_sub_v2r8(iy1,jy2);
241 dz12 = _fjsp_sub_v2r8(iz1,jz2);
242 dx20 = _fjsp_sub_v2r8(ix2,jx0);
243 dy20 = _fjsp_sub_v2r8(iy2,jy0);
244 dz20 = _fjsp_sub_v2r8(iz2,jz0);
245 dx21 = _fjsp_sub_v2r8(ix2,jx1);
246 dy21 = _fjsp_sub_v2r8(iy2,jy1);
247 dz21 = _fjsp_sub_v2r8(iz2,jz1);
248 dx22 = _fjsp_sub_v2r8(ix2,jx2);
249 dy22 = _fjsp_sub_v2r8(iy2,jy2);
250 dz22 = _fjsp_sub_v2r8(iz2,jz2);
252 /* Calculate squared distance and things based on it */
253 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
254 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
255 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
256 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
257 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
258 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
259 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
260 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
261 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
263 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
264 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
265 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
266 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
267 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
268 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
269 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
270 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
271 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
273 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
274 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
275 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
276 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
277 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
278 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
279 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
280 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
281 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
283 fjx0 = _fjsp_setzero_v2r8();
284 fjy0 = _fjsp_setzero_v2r8();
285 fjz0 = _fjsp_setzero_v2r8();
286 fjx1 = _fjsp_setzero_v2r8();
287 fjy1 = _fjsp_setzero_v2r8();
288 fjz1 = _fjsp_setzero_v2r8();
289 fjx2 = _fjsp_setzero_v2r8();
290 fjy2 = _fjsp_setzero_v2r8();
291 fjz2 = _fjsp_setzero_v2r8();
293 /**************************
294 * CALCULATE INTERACTIONS *
295 **************************/
297 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
300 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
302 /* Calculate table index by multiplying r with table scale and truncate to integer */
303 rt = _fjsp_mul_v2r8(r00,vftabscale);
304 itab_tmp = _fjsp_dtox_v2r8(rt);
305 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
306 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
307 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
312 /* REACTION-FIELD ELECTROSTATICS */
313 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
314 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
316 /* CUBIC SPLINE TABLE DISPERSION */
317 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
318 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
319 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
320 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
321 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
322 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
323 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
324 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
325 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
326 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
327 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
329 /* CUBIC SPLINE TABLE REPULSION */
330 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
331 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
332 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
333 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
334 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
335 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
336 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
337 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
338 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
339 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
340 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
341 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
342 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
344 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
346 /* Update potential sum for this i atom from the interaction with this j atom. */
347 velec = _fjsp_and_v2r8(velec,cutoff_mask);
348 velecsum = _fjsp_add_v2r8(velecsum,velec);
349 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
350 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
352 fscal = _fjsp_add_v2r8(felec,fvdw);
354 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
356 /* Update vectorial force */
357 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
358 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
359 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
361 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
362 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
363 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
367 /**************************
368 * CALCULATE INTERACTIONS *
369 **************************/
371 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
374 /* REACTION-FIELD ELECTROSTATICS */
375 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
376 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
378 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
380 /* Update potential sum for this i atom from the interaction with this j atom. */
381 velec = _fjsp_and_v2r8(velec,cutoff_mask);
382 velecsum = _fjsp_add_v2r8(velecsum,velec);
386 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
388 /* Update vectorial force */
389 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
390 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
391 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
393 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
394 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
395 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
399 /**************************
400 * CALCULATE INTERACTIONS *
401 **************************/
403 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
406 /* REACTION-FIELD ELECTROSTATICS */
407 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
408 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
410 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
412 /* Update potential sum for this i atom from the interaction with this j atom. */
413 velec = _fjsp_and_v2r8(velec,cutoff_mask);
414 velecsum = _fjsp_add_v2r8(velecsum,velec);
418 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
420 /* Update vectorial force */
421 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
422 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
423 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
425 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
426 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
427 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
431 /**************************
432 * CALCULATE INTERACTIONS *
433 **************************/
435 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
438 /* REACTION-FIELD ELECTROSTATICS */
439 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
440 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
442 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
444 /* Update potential sum for this i atom from the interaction with this j atom. */
445 velec = _fjsp_and_v2r8(velec,cutoff_mask);
446 velecsum = _fjsp_add_v2r8(velecsum,velec);
450 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
452 /* Update vectorial force */
453 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
454 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
455 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
457 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
458 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
459 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
463 /**************************
464 * CALCULATE INTERACTIONS *
465 **************************/
467 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
470 /* REACTION-FIELD ELECTROSTATICS */
471 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
472 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
474 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
476 /* Update potential sum for this i atom from the interaction with this j atom. */
477 velec = _fjsp_and_v2r8(velec,cutoff_mask);
478 velecsum = _fjsp_add_v2r8(velecsum,velec);
482 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
484 /* Update vectorial force */
485 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
486 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
487 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
489 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
490 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
491 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
495 /**************************
496 * CALCULATE INTERACTIONS *
497 **************************/
499 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
502 /* REACTION-FIELD ELECTROSTATICS */
503 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
504 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
506 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
508 /* Update potential sum for this i atom from the interaction with this j atom. */
509 velec = _fjsp_and_v2r8(velec,cutoff_mask);
510 velecsum = _fjsp_add_v2r8(velecsum,velec);
514 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
516 /* Update vectorial force */
517 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
518 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
519 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
521 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
522 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
523 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
527 /**************************
528 * CALCULATE INTERACTIONS *
529 **************************/
531 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
534 /* REACTION-FIELD ELECTROSTATICS */
535 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
536 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
538 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
540 /* Update potential sum for this i atom from the interaction with this j atom. */
541 velec = _fjsp_and_v2r8(velec,cutoff_mask);
542 velecsum = _fjsp_add_v2r8(velecsum,velec);
546 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
548 /* Update vectorial force */
549 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
550 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
551 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
553 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
554 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
555 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
559 /**************************
560 * CALCULATE INTERACTIONS *
561 **************************/
563 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
566 /* REACTION-FIELD ELECTROSTATICS */
567 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
568 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
570 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
572 /* Update potential sum for this i atom from the interaction with this j atom. */
573 velec = _fjsp_and_v2r8(velec,cutoff_mask);
574 velecsum = _fjsp_add_v2r8(velecsum,velec);
578 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
580 /* Update vectorial force */
581 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
582 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
583 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
585 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
586 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
587 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
591 /**************************
592 * CALCULATE INTERACTIONS *
593 **************************/
595 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
598 /* REACTION-FIELD ELECTROSTATICS */
599 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
600 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
602 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
604 /* Update potential sum for this i atom from the interaction with this j atom. */
605 velec = _fjsp_and_v2r8(velec,cutoff_mask);
606 velecsum = _fjsp_add_v2r8(velecsum,velec);
610 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
612 /* Update vectorial force */
613 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
614 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
615 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
617 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
618 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
619 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
623 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
625 /* Inner loop uses 387 flops */
632 j_coord_offsetA = DIM*jnrA;
634 /* load j atom coordinates */
635 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
636 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
638 /* Calculate displacement vector */
639 dx00 = _fjsp_sub_v2r8(ix0,jx0);
640 dy00 = _fjsp_sub_v2r8(iy0,jy0);
641 dz00 = _fjsp_sub_v2r8(iz0,jz0);
642 dx01 = _fjsp_sub_v2r8(ix0,jx1);
643 dy01 = _fjsp_sub_v2r8(iy0,jy1);
644 dz01 = _fjsp_sub_v2r8(iz0,jz1);
645 dx02 = _fjsp_sub_v2r8(ix0,jx2);
646 dy02 = _fjsp_sub_v2r8(iy0,jy2);
647 dz02 = _fjsp_sub_v2r8(iz0,jz2);
648 dx10 = _fjsp_sub_v2r8(ix1,jx0);
649 dy10 = _fjsp_sub_v2r8(iy1,jy0);
650 dz10 = _fjsp_sub_v2r8(iz1,jz0);
651 dx11 = _fjsp_sub_v2r8(ix1,jx1);
652 dy11 = _fjsp_sub_v2r8(iy1,jy1);
653 dz11 = _fjsp_sub_v2r8(iz1,jz1);
654 dx12 = _fjsp_sub_v2r8(ix1,jx2);
655 dy12 = _fjsp_sub_v2r8(iy1,jy2);
656 dz12 = _fjsp_sub_v2r8(iz1,jz2);
657 dx20 = _fjsp_sub_v2r8(ix2,jx0);
658 dy20 = _fjsp_sub_v2r8(iy2,jy0);
659 dz20 = _fjsp_sub_v2r8(iz2,jz0);
660 dx21 = _fjsp_sub_v2r8(ix2,jx1);
661 dy21 = _fjsp_sub_v2r8(iy2,jy1);
662 dz21 = _fjsp_sub_v2r8(iz2,jz1);
663 dx22 = _fjsp_sub_v2r8(ix2,jx2);
664 dy22 = _fjsp_sub_v2r8(iy2,jy2);
665 dz22 = _fjsp_sub_v2r8(iz2,jz2);
667 /* Calculate squared distance and things based on it */
668 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
669 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
670 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
671 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
672 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
673 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
674 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
675 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
676 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
678 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
679 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
680 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
681 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
682 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
683 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
684 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
685 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
686 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
688 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
689 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
690 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
691 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
692 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
693 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
694 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
695 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
696 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
698 fjx0 = _fjsp_setzero_v2r8();
699 fjy0 = _fjsp_setzero_v2r8();
700 fjz0 = _fjsp_setzero_v2r8();
701 fjx1 = _fjsp_setzero_v2r8();
702 fjy1 = _fjsp_setzero_v2r8();
703 fjz1 = _fjsp_setzero_v2r8();
704 fjx2 = _fjsp_setzero_v2r8();
705 fjy2 = _fjsp_setzero_v2r8();
706 fjz2 = _fjsp_setzero_v2r8();
708 /**************************
709 * CALCULATE INTERACTIONS *
710 **************************/
712 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
715 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
717 /* Calculate table index by multiplying r with table scale and truncate to integer */
718 rt = _fjsp_mul_v2r8(r00,vftabscale);
719 itab_tmp = _fjsp_dtox_v2r8(rt);
720 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
721 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
722 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
727 /* REACTION-FIELD ELECTROSTATICS */
728 velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
729 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
731 /* CUBIC SPLINE TABLE DISPERSION */
732 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
733 F = _fjsp_setzero_v2r8();
734 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
735 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
736 H = _fjsp_setzero_v2r8();
737 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
738 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
739 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
740 vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
741 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
742 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
744 /* CUBIC SPLINE TABLE REPULSION */
745 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
746 F = _fjsp_setzero_v2r8();
747 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
748 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
749 H = _fjsp_setzero_v2r8();
750 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
751 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
752 VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
753 vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
754 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
755 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
756 vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
757 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
759 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
761 /* Update potential sum for this i atom from the interaction with this j atom. */
762 velec = _fjsp_and_v2r8(velec,cutoff_mask);
763 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
764 velecsum = _fjsp_add_v2r8(velecsum,velec);
765 vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
766 vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
767 vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
769 fscal = _fjsp_add_v2r8(felec,fvdw);
771 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
773 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
775 /* Update vectorial force */
776 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
777 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
778 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
780 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
781 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
782 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
786 /**************************
787 * CALCULATE INTERACTIONS *
788 **************************/
790 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
793 /* REACTION-FIELD ELECTROSTATICS */
794 velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
795 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
797 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
799 /* Update potential sum for this i atom from the interaction with this j atom. */
800 velec = _fjsp_and_v2r8(velec,cutoff_mask);
801 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
802 velecsum = _fjsp_add_v2r8(velecsum,velec);
806 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
808 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
810 /* Update vectorial force */
811 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
812 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
813 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
815 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
816 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
817 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
821 /**************************
822 * CALCULATE INTERACTIONS *
823 **************************/
825 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
828 /* REACTION-FIELD ELECTROSTATICS */
829 velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
830 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
832 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
834 /* Update potential sum for this i atom from the interaction with this j atom. */
835 velec = _fjsp_and_v2r8(velec,cutoff_mask);
836 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
837 velecsum = _fjsp_add_v2r8(velecsum,velec);
841 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
843 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
845 /* Update vectorial force */
846 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
847 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
848 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
850 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
851 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
852 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
856 /**************************
857 * CALCULATE INTERACTIONS *
858 **************************/
860 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
863 /* REACTION-FIELD ELECTROSTATICS */
864 velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
865 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
867 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
869 /* Update potential sum for this i atom from the interaction with this j atom. */
870 velec = _fjsp_and_v2r8(velec,cutoff_mask);
871 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
872 velecsum = _fjsp_add_v2r8(velecsum,velec);
876 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
878 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
880 /* Update vectorial force */
881 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
882 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
883 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
885 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
886 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
887 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
891 /**************************
892 * CALCULATE INTERACTIONS *
893 **************************/
895 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
898 /* REACTION-FIELD ELECTROSTATICS */
899 velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
900 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
902 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
904 /* Update potential sum for this i atom from the interaction with this j atom. */
905 velec = _fjsp_and_v2r8(velec,cutoff_mask);
906 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
907 velecsum = _fjsp_add_v2r8(velecsum,velec);
911 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
913 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
915 /* Update vectorial force */
916 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
917 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
918 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
920 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
921 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
922 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
926 /**************************
927 * CALCULATE INTERACTIONS *
928 **************************/
930 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
933 /* REACTION-FIELD ELECTROSTATICS */
934 velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
935 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
937 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
939 /* Update potential sum for this i atom from the interaction with this j atom. */
940 velec = _fjsp_and_v2r8(velec,cutoff_mask);
941 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
942 velecsum = _fjsp_add_v2r8(velecsum,velec);
946 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
948 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
950 /* Update vectorial force */
951 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
952 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
953 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
955 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
956 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
957 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
961 /**************************
962 * CALCULATE INTERACTIONS *
963 **************************/
965 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
968 /* REACTION-FIELD ELECTROSTATICS */
969 velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
970 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
972 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
974 /* Update potential sum for this i atom from the interaction with this j atom. */
975 velec = _fjsp_and_v2r8(velec,cutoff_mask);
976 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
977 velecsum = _fjsp_add_v2r8(velecsum,velec);
981 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
983 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
985 /* Update vectorial force */
986 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
987 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
988 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
990 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
991 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
992 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
996 /**************************
997 * CALCULATE INTERACTIONS *
998 **************************/
1000 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1003 /* REACTION-FIELD ELECTROSTATICS */
1004 velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
1005 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1007 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1009 /* Update potential sum for this i atom from the interaction with this j atom. */
1010 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1011 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1012 velecsum = _fjsp_add_v2r8(velecsum,velec);
1016 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1018 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1020 /* Update vectorial force */
1021 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1022 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1023 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1025 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1026 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1027 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1031 /**************************
1032 * CALCULATE INTERACTIONS *
1033 **************************/
1035 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1038 /* REACTION-FIELD ELECTROSTATICS */
1039 velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
1040 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1042 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1044 /* Update potential sum for this i atom from the interaction with this j atom. */
1045 velec = _fjsp_and_v2r8(velec,cutoff_mask);
1046 velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
1047 velecsum = _fjsp_add_v2r8(velecsum,velec);
1051 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1053 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1055 /* Update vectorial force */
1056 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1057 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1058 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1060 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1061 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1062 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1066 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1068 /* Inner loop uses 387 flops */
1071 /* End of innermost loop */
1073 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1074 f+i_coord_offset,fshift+i_shift_offset);
1077 /* Update potential energies */
1078 gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
1079 gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
1081 /* Increment number of inner iterations */
1082 inneriter += j_index_end - j_index_start;
1084 /* Outer loop uses 20 flops */
1087 /* Increment number of outer iterations */
1090 /* Update outer/inner flops */
1092 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387);
1095 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1096 * Electrostatics interaction: ReactionField
1097 * VdW interaction: CubicSplineTable
1098 * Geometry: Water3-Water3
1099 * Calculate force/pot: Force
1102 nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
1103 (t_nblist * gmx_restrict nlist,
1104 rvec * gmx_restrict xx,
1105 rvec * gmx_restrict ff,
1106 t_forcerec * gmx_restrict fr,
1107 t_mdatoms * gmx_restrict mdatoms,
1108 nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
1109 t_nrnb * gmx_restrict nrnb)
1111 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1112 * just 0 for non-waters.
1113 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1114 * jnr indices corresponding to data put in the four positions in the SIMD register.
1116 int i_shift_offset,i_coord_offset,outeriter,inneriter;
1117 int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
1119 int j_coord_offsetA,j_coord_offsetB;
1120 int *iinr,*jindex,*jjnr,*shiftidx,*gid;
1121 real rcutoff_scalar;
1122 real *shiftvec,*fshift,*x,*f;
1123 _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
1125 _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
1127 _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
1129 _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
1130 int vdwjidx0A,vdwjidx0B;
1131 _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
1132 int vdwjidx1A,vdwjidx1B;
1133 _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
1134 int vdwjidx2A,vdwjidx2B;
1135 _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
1136 _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
1137 _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
1138 _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
1139 _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
1140 _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
1141 _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
1142 _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
1143 _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
1144 _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
1145 _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
1148 _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
1151 _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
1152 _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
1153 _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
1155 _fjsp_v2r8 itab_tmp;
1156 _fjsp_v2r8 dummy_mask,cutoff_mask;
1157 _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
1158 _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
1159 union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
1166 jindex = nlist->jindex;
1168 shiftidx = nlist->shift;
1170 shiftvec = fr->shift_vec[0];
1171 fshift = fr->fshift[0];
1172 facel = gmx_fjsp_set1_v2r8(fr->epsfac);
1173 charge = mdatoms->chargeA;
1174 krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
1175 krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
1176 crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
1177 nvdwtype = fr->ntype;
1178 vdwparam = fr->nbfp;
1179 vdwtype = mdatoms->typeA;
1181 vftab = kernel_data->table_vdw->data;
1182 vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
1184 /* Setup water-specific parameters */
1185 inr = nlist->iinr[0];
1186 iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
1187 iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
1188 iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
1189 vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
1191 jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
1192 jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
1193 jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
1194 vdwjidx0A = 2*vdwtype[inr+0];
1195 qq00 = _fjsp_mul_v2r8(iq0,jq0);
1196 c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
1197 c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
1198 qq01 = _fjsp_mul_v2r8(iq0,jq1);
1199 qq02 = _fjsp_mul_v2r8(iq0,jq2);
1200 qq10 = _fjsp_mul_v2r8(iq1,jq0);
1201 qq11 = _fjsp_mul_v2r8(iq1,jq1);
1202 qq12 = _fjsp_mul_v2r8(iq1,jq2);
1203 qq20 = _fjsp_mul_v2r8(iq2,jq0);
1204 qq21 = _fjsp_mul_v2r8(iq2,jq1);
1205 qq22 = _fjsp_mul_v2r8(iq2,jq2);
1207 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1208 rcutoff_scalar = fr->rcoulomb;
1209 rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
1210 rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
1212 /* Avoid stupid compiler warnings */
1214 j_coord_offsetA = 0;
1215 j_coord_offsetB = 0;
1220 /* Start outer loop over neighborlists */
1221 for(iidx=0; iidx<nri; iidx++)
1223 /* Load shift vector for this list */
1224 i_shift_offset = DIM*shiftidx[iidx];
1226 /* Load limits for loop over neighbors */
1227 j_index_start = jindex[iidx];
1228 j_index_end = jindex[iidx+1];
1230 /* Get outer coordinate index */
1232 i_coord_offset = DIM*inr;
1234 /* Load i particle coords and add shift vector */
1235 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
1236 &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
1238 fix0 = _fjsp_setzero_v2r8();
1239 fiy0 = _fjsp_setzero_v2r8();
1240 fiz0 = _fjsp_setzero_v2r8();
1241 fix1 = _fjsp_setzero_v2r8();
1242 fiy1 = _fjsp_setzero_v2r8();
1243 fiz1 = _fjsp_setzero_v2r8();
1244 fix2 = _fjsp_setzero_v2r8();
1245 fiy2 = _fjsp_setzero_v2r8();
1246 fiz2 = _fjsp_setzero_v2r8();
1248 /* Start inner kernel loop */
1249 for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
1252 /* Get j neighbor index, and coordinate index */
1254 jnrB = jjnr[jidx+1];
1255 j_coord_offsetA = DIM*jnrA;
1256 j_coord_offsetB = DIM*jnrB;
1258 /* load j atom coordinates */
1259 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
1260 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1262 /* Calculate displacement vector */
1263 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1264 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1265 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1266 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1267 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1268 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1269 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1270 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1271 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1272 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1273 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1274 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1275 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1276 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1277 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1278 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1279 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1280 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1281 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1282 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1283 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1284 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1285 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1286 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1287 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1288 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1289 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1291 /* Calculate squared distance and things based on it */
1292 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1293 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1294 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1295 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1296 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1297 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1298 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1299 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1300 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1302 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1303 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1304 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1305 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1306 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1307 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1308 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1309 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1310 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1312 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1313 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1314 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1315 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1316 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1317 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1318 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1319 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1320 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1322 fjx0 = _fjsp_setzero_v2r8();
1323 fjy0 = _fjsp_setzero_v2r8();
1324 fjz0 = _fjsp_setzero_v2r8();
1325 fjx1 = _fjsp_setzero_v2r8();
1326 fjy1 = _fjsp_setzero_v2r8();
1327 fjz1 = _fjsp_setzero_v2r8();
1328 fjx2 = _fjsp_setzero_v2r8();
1329 fjy2 = _fjsp_setzero_v2r8();
1330 fjz2 = _fjsp_setzero_v2r8();
1332 /**************************
1333 * CALCULATE INTERACTIONS *
1334 **************************/
1336 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1339 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1341 /* Calculate table index by multiplying r with table scale and truncate to integer */
1342 rt = _fjsp_mul_v2r8(r00,vftabscale);
1343 itab_tmp = _fjsp_dtox_v2r8(rt);
1344 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1345 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1346 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1351 /* REACTION-FIELD ELECTROSTATICS */
1352 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1354 /* CUBIC SPLINE TABLE DISPERSION */
1355 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1356 F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
1357 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1358 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1359 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
1360 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1361 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1362 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1363 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1365 /* CUBIC SPLINE TABLE REPULSION */
1366 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1367 F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
1368 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1369 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1370 H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
1371 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1372 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1373 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1374 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1375 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1377 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1379 fscal = _fjsp_add_v2r8(felec,fvdw);
1381 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1383 /* Update vectorial force */
1384 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1385 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1386 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1388 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1389 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1390 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1394 /**************************
1395 * CALCULATE INTERACTIONS *
1396 **************************/
1398 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1401 /* REACTION-FIELD ELECTROSTATICS */
1402 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1404 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1408 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1410 /* Update vectorial force */
1411 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1412 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1413 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1415 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1416 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1417 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1421 /**************************
1422 * CALCULATE INTERACTIONS *
1423 **************************/
1425 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1428 /* REACTION-FIELD ELECTROSTATICS */
1429 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1431 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1435 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1437 /* Update vectorial force */
1438 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1439 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1440 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1442 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1443 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1444 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1448 /**************************
1449 * CALCULATE INTERACTIONS *
1450 **************************/
1452 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1455 /* REACTION-FIELD ELECTROSTATICS */
1456 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1458 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1462 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1464 /* Update vectorial force */
1465 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1466 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1467 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1469 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1470 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1471 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1475 /**************************
1476 * CALCULATE INTERACTIONS *
1477 **************************/
1479 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1482 /* REACTION-FIELD ELECTROSTATICS */
1483 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1485 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1489 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1491 /* Update vectorial force */
1492 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1493 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1494 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1496 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1497 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1498 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1502 /**************************
1503 * CALCULATE INTERACTIONS *
1504 **************************/
1506 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1509 /* REACTION-FIELD ELECTROSTATICS */
1510 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1512 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1516 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1518 /* Update vectorial force */
1519 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1520 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1521 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1523 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1524 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1525 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1529 /**************************
1530 * CALCULATE INTERACTIONS *
1531 **************************/
1533 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1536 /* REACTION-FIELD ELECTROSTATICS */
1537 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1539 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1543 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1545 /* Update vectorial force */
1546 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1547 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1548 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1550 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1551 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1552 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1556 /**************************
1557 * CALCULATE INTERACTIONS *
1558 **************************/
1560 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1563 /* REACTION-FIELD ELECTROSTATICS */
1564 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1566 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1570 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1572 /* Update vectorial force */
1573 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1574 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1575 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1577 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1578 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1579 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1583 /**************************
1584 * CALCULATE INTERACTIONS *
1585 **************************/
1587 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1590 /* REACTION-FIELD ELECTROSTATICS */
1591 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1593 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1597 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1599 /* Update vectorial force */
1600 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1601 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1602 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1604 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1605 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1606 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1610 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1612 /* Inner loop uses 324 flops */
1615 if(jidx<j_index_end)
1619 j_coord_offsetA = DIM*jnrA;
1621 /* load j atom coordinates */
1622 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
1623 &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
1625 /* Calculate displacement vector */
1626 dx00 = _fjsp_sub_v2r8(ix0,jx0);
1627 dy00 = _fjsp_sub_v2r8(iy0,jy0);
1628 dz00 = _fjsp_sub_v2r8(iz0,jz0);
1629 dx01 = _fjsp_sub_v2r8(ix0,jx1);
1630 dy01 = _fjsp_sub_v2r8(iy0,jy1);
1631 dz01 = _fjsp_sub_v2r8(iz0,jz1);
1632 dx02 = _fjsp_sub_v2r8(ix0,jx2);
1633 dy02 = _fjsp_sub_v2r8(iy0,jy2);
1634 dz02 = _fjsp_sub_v2r8(iz0,jz2);
1635 dx10 = _fjsp_sub_v2r8(ix1,jx0);
1636 dy10 = _fjsp_sub_v2r8(iy1,jy0);
1637 dz10 = _fjsp_sub_v2r8(iz1,jz0);
1638 dx11 = _fjsp_sub_v2r8(ix1,jx1);
1639 dy11 = _fjsp_sub_v2r8(iy1,jy1);
1640 dz11 = _fjsp_sub_v2r8(iz1,jz1);
1641 dx12 = _fjsp_sub_v2r8(ix1,jx2);
1642 dy12 = _fjsp_sub_v2r8(iy1,jy2);
1643 dz12 = _fjsp_sub_v2r8(iz1,jz2);
1644 dx20 = _fjsp_sub_v2r8(ix2,jx0);
1645 dy20 = _fjsp_sub_v2r8(iy2,jy0);
1646 dz20 = _fjsp_sub_v2r8(iz2,jz0);
1647 dx21 = _fjsp_sub_v2r8(ix2,jx1);
1648 dy21 = _fjsp_sub_v2r8(iy2,jy1);
1649 dz21 = _fjsp_sub_v2r8(iz2,jz1);
1650 dx22 = _fjsp_sub_v2r8(ix2,jx2);
1651 dy22 = _fjsp_sub_v2r8(iy2,jy2);
1652 dz22 = _fjsp_sub_v2r8(iz2,jz2);
1654 /* Calculate squared distance and things based on it */
1655 rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
1656 rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
1657 rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
1658 rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
1659 rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
1660 rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
1661 rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
1662 rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
1663 rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
1665 rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
1666 rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
1667 rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
1668 rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
1669 rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
1670 rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
1671 rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
1672 rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
1673 rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
1675 rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
1676 rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
1677 rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
1678 rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
1679 rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
1680 rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
1681 rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
1682 rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
1683 rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
1685 fjx0 = _fjsp_setzero_v2r8();
1686 fjy0 = _fjsp_setzero_v2r8();
1687 fjz0 = _fjsp_setzero_v2r8();
1688 fjx1 = _fjsp_setzero_v2r8();
1689 fjy1 = _fjsp_setzero_v2r8();
1690 fjz1 = _fjsp_setzero_v2r8();
1691 fjx2 = _fjsp_setzero_v2r8();
1692 fjy2 = _fjsp_setzero_v2r8();
1693 fjz2 = _fjsp_setzero_v2r8();
1695 /**************************
1696 * CALCULATE INTERACTIONS *
1697 **************************/
1699 if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
1702 r00 = _fjsp_mul_v2r8(rsq00,rinv00);
1704 /* Calculate table index by multiplying r with table scale and truncate to integer */
1705 rt = _fjsp_mul_v2r8(r00,vftabscale);
1706 itab_tmp = _fjsp_dtox_v2r8(rt);
1707 vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
1708 twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
1709 _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
1714 /* REACTION-FIELD ELECTROSTATICS */
1715 felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
1717 /* CUBIC SPLINE TABLE DISPERSION */
1718 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
1719 F = _fjsp_setzero_v2r8();
1720 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1721 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
1722 H = _fjsp_setzero_v2r8();
1723 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1724 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1725 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1726 fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
1728 /* CUBIC SPLINE TABLE REPULSION */
1729 Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
1730 F = _fjsp_setzero_v2r8();
1731 GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
1732 G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
1733 H = _fjsp_setzero_v2r8();
1734 GMX_FJSP_TRANSPOSE2_V2R8(G,H);
1735 Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
1736 FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
1737 fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
1738 fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
1740 cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
1742 fscal = _fjsp_add_v2r8(felec,fvdw);
1744 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1746 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1748 /* Update vectorial force */
1749 fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
1750 fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
1751 fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
1753 fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
1754 fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
1755 fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
1759 /**************************
1760 * CALCULATE INTERACTIONS *
1761 **************************/
1763 if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
1766 /* REACTION-FIELD ELECTROSTATICS */
1767 felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
1769 cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
1773 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1775 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1777 /* Update vectorial force */
1778 fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
1779 fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
1780 fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
1782 fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
1783 fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
1784 fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
1788 /**************************
1789 * CALCULATE INTERACTIONS *
1790 **************************/
1792 if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
1795 /* REACTION-FIELD ELECTROSTATICS */
1796 felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
1798 cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
1802 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1804 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1806 /* Update vectorial force */
1807 fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
1808 fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
1809 fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
1811 fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
1812 fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
1813 fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
1817 /**************************
1818 * CALCULATE INTERACTIONS *
1819 **************************/
1821 if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
1824 /* REACTION-FIELD ELECTROSTATICS */
1825 felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
1827 cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
1831 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1833 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1835 /* Update vectorial force */
1836 fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
1837 fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
1838 fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
1840 fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
1841 fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
1842 fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
1846 /**************************
1847 * CALCULATE INTERACTIONS *
1848 **************************/
1850 if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
1853 /* REACTION-FIELD ELECTROSTATICS */
1854 felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
1856 cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
1860 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1862 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1864 /* Update vectorial force */
1865 fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
1866 fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
1867 fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
1869 fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
1870 fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
1871 fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
1875 /**************************
1876 * CALCULATE INTERACTIONS *
1877 **************************/
1879 if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
1882 /* REACTION-FIELD ELECTROSTATICS */
1883 felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
1885 cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
1889 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1891 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1893 /* Update vectorial force */
1894 fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
1895 fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
1896 fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
1898 fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
1899 fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
1900 fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
1904 /**************************
1905 * CALCULATE INTERACTIONS *
1906 **************************/
1908 if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
1911 /* REACTION-FIELD ELECTROSTATICS */
1912 felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
1914 cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
1918 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1920 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1922 /* Update vectorial force */
1923 fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
1924 fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
1925 fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
1927 fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
1928 fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
1929 fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
1933 /**************************
1934 * CALCULATE INTERACTIONS *
1935 **************************/
1937 if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
1940 /* REACTION-FIELD ELECTROSTATICS */
1941 felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
1943 cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
1947 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1949 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1951 /* Update vectorial force */
1952 fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
1953 fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
1954 fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
1956 fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
1957 fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
1958 fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
1962 /**************************
1963 * CALCULATE INTERACTIONS *
1964 **************************/
1966 if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
1969 /* REACTION-FIELD ELECTROSTATICS */
1970 felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
1972 cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
1976 fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
1978 fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
1980 /* Update vectorial force */
1981 fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
1982 fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
1983 fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
1985 fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
1986 fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
1987 fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
1991 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
1993 /* Inner loop uses 324 flops */
1996 /* End of innermost loop */
1998 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
1999 f+i_coord_offset,fshift+i_shift_offset);
2001 /* Increment number of inner iterations */
2002 inneriter += j_index_end - j_index_start;
2004 /* Outer loop uses 18 flops */
2007 /* Increment number of outer iterations */
2010 /* Update outer/inner flops */
2012 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*324);