__m128 * gmx_restrict c6, __m128 * gmx_restrict c12)
{
__m128 t1,t2,t3,t4;
- /* NOTE: This might be faster with _mm_loadu_ps() */
t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p1);
t2 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p2);
t3 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p3);
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 168 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 171 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 148 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 151 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 197 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 201 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 177 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 181 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 151 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 154 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 134 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 137 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 173 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 176 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 156 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 152 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 100 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 103 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 143 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 146 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 102 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 102 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 93 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 177 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 180 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 168 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 221 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 225 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 209 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 213 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 162 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 153 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 162 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 121 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 124 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 110 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 113 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 146 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 139 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 102 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 91 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 94 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 122 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 125 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 114 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 87 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 87 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 87 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 87 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 154 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 127 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 176 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 177 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 151 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 106 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 106 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 161 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 161 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 132 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 132 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 151 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 152 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 130 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 131 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 179 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 180 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 158 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
}
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
}
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
}
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 99 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 140 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 164 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 140 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 140 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_macc_ps(dy00,fscal,fiy0);
fiz0 = _mm_macc_ps(dz00,fscal,fiz0);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx00,fscal),
- _mm_mul_ps(dy00,fscal),
- _mm_mul_ps(dz00,fscal));
+ fjx0 = _mm_macc_ps(dx00,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy00,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz00,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 105 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_macc_ps(dy10,fscal,fiy1);
fiz1 = _mm_macc_ps(dz10,fscal,fiz1);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx10,fscal),
- _mm_mul_ps(dy10,fscal),
- _mm_mul_ps(dz10,fscal));
+ fjx0 = _mm_macc_ps(dx10,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy10,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz10,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_macc_ps(dy20,fscal,fiy2);
fiz2 = _mm_macc_ps(dz20,fscal,fiz2);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx20,fscal),
- _mm_mul_ps(dy20,fscal),
- _mm_mul_ps(dz20,fscal));
+ fjx0 = _mm_macc_ps(dx20,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy20,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz20,fscal,fjz0);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_macc_ps(dy30,fscal,fiy3);
fiz3 = _mm_macc_ps(dz30,fscal,fiz3);
+ fjx0 = _mm_macc_ps(dx30,fscal,fjx0);
+ fjy0 = _mm_macc_ps(dy30,fscal,fjy0);
+ fjz0 = _mm_macc_ps(dz30,fscal,fjz0);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
- _mm_mul_ps(dx30,fscal),
- _mm_mul_ps(dy30,fscal),
- _mm_mul_ps(dz30,fscal));
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
/* #endfor */
/* #endif */
- /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_J */
+ /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_I */
/* #for J in PARTICLES_J */
fjx{J} = _mm_setzero_ps();
fjy{J} = _mm_setzero_ps();
fiz{I} = _mm_macc_ps(dz{I}{J},fscal,fiz{I});
/* #define INNERFLOPS INNERFLOPS+6 */
- /* #if GEOMETRY_J == 'Particle' */
+ /* #if GEOMETRY_I == 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endfor */
/* ## End of loop over i-j interaction pairs */
- /* #if GEOMETRY_J != 'Particle' */
+ /* #if GEOMETRY_I != 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endif */
/* #endif */
- /* #if GEOMETRY_J == 'Water3' */
+ /* #if 'Water' in GEOMETRY_I and GEOMETRY_J == 'Particle' */
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+ /* #elif GEOMETRY_J == 'Water3' */
gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
/* #define INNERFLOPS INNERFLOPS+9 */
{
__m128 t1,t2,t3,t4;
- t1 = _mm_castpd_ps(_mm_load_sd((const double *)p1));
- t2 = _mm_castpd_ps(_mm_load_sd((const double *)p2));
- t3 = _mm_castpd_ps(_mm_load_sd((const double *)p3));
- t4 = _mm_castpd_ps(_mm_load_sd((const double *)p4));
+ t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p1); /* - - c12a c6a */
+ t2 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p2); /* - - c12b c6b */
+ t3 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p3); /* - - c12c c6c */
+ t4 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p4); /* - - c12d c6d */
t1 = _mm_unpacklo_ps(t1,t2);
t2 = _mm_unpacklo_ps(t3,t4);
*c6 = _mm_movelh_ps(t1,t2);
{
__m128 t1,t2,t3,t4;
- t1 = _mm_castpd_ps(_mm_load_sd((const double *)xyz_shift));
- t2 = _mm_castpd_ps(_mm_load_sd((const double *)xyz));
+ t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz_shift);
+ t2 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz);
t3 = _mm_load_ss(xyz_shift+2);
t4 = _mm_load_ss(xyz+2);
t1 = _mm_add_ps(t1,t2);
__m128 tA,tB;
__m128 t1,t2,t3,t4,t5,t6;
- tA = _mm_castpd_ps(_mm_load_sd((const double *)xyz_shift));
+ tA = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz_shift);
tB = _mm_load_ss(xyz_shift+2);
t1 = _mm_loadu_ps(xyz);
_mm_storeh_pi((__m64 *)(fshiftptr+1),t3);
}
-static gmx_inline void
-gmx_mm_update_iforce_2atom_swizzle_ps(__m128 fix1, __m128 fiy1, __m128 fiz1,
- __m128 fix2, __m128 fiy2, __m128 fiz2,
- float * gmx_restrict fptr,
- float * gmx_restrict fshiftptr)
-{
- __m128 t1,t2,t4;
-
- /* transpose data */
- _MM_TRANSPOSE4_PS(fix1,fiy1,fiz1,fix2);
- t1 = _mm_unpacklo_ps(fiy2,fiz2);
- t2 = _mm_unpackhi_ps(fiy2,fiz2);
-
- fix1 = _mm_add_ps(_mm_add_ps(fix1,fiy1), _mm_add_ps(fiz1,fix2));
- t1 = _mm_add_ps(t1,t2);
- t2 = _mm_movehl_ps(t2,t1);
- fiy2 = _mm_add_ps(t1,t2);
-
- _mm_storeu_ps(fptr, _mm_add_ps(fix1,_mm_loadu_ps(fptr) ));
- t1 = _mm_loadl_pi(t1,(__m64 *)(fptr+4));
- _mm_storel_pi((__m64 *)(fptr+4), _mm_add_ps(fiy2,t1));
-
- t4 = _mm_load_ss(fshiftptr+2);
- t4 = _mm_loadh_pi(t4,(__m64 *)(fshiftptr));
-
- t1 = _mm_shuffle_ps(fix1,fiy2,_MM_SHUFFLE(0,0,3,2)); /* fiy2 - fix2 fiz1 */
- t1 = _mm_shuffle_ps(t1,t1,_MM_SHUFFLE(3,1,0,0)); /* fiy2 fix2 - fiz1 */
- t2 = _mm_shuffle_ps(fiy2,fix1,_MM_SHUFFLE(1,0,0,1)); /* fiy1 fix1 - fiz2 */
-
- t1 = _mm_add_ps(t1,t2);
- t1 = _mm_add_ps(t1,t4); /* y x - z */
-
- _mm_store_ss(fshiftptr+2,t1);
- _mm_storeh_pi((__m64 *)(fshiftptr),t1);
-}
-
-
-
static gmx_inline void
gmx_mm_update_iforce_3atom_swizzle_ps(__m128 fix1, __m128 fiy1, __m128 fiz1,
__m128 fix2, __m128 fiy2, __m128 fiz2,
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 159 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 162 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 139 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 185 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 189 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 169 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 145 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 125 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 161 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 164 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 144 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 147 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 132 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 132 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 119 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 109 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 140 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 141 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 130 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 116 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 116 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 156 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 124 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 127 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 179 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 182 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 147 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 213 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 216 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 201 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 204 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 254 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 258 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 242 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 246 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 195 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 198 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 186 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 189 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 195 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 198 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 186 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 189 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 157 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 160 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 134 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 137 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 179 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 183 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 156 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 160 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 138 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 115 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 155 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 158 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 123 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 111 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 123 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 111 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 144 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 145 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 164 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 139 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 149 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 149 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 143 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 121 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 122 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 167 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 168 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 146 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 147 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
- }
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 131 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 132 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 109 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 152 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 130 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 81 flops */
}
/* #endfor */
/* #endif */
- /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_J */
+ /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_I */
/* #for J in PARTICLES_J */
fjx{J} = _mm_setzero_ps();
fjy{J} = _mm_setzero_ps();
fiz{I} = _mm_add_ps(fiz{I},tz);
/* #define INNERFLOPS INNERFLOPS+6 */
- /* #if GEOMETRY_J == 'Particle' */
+ /* #if GEOMETRY_I == 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endfor */
/* ## End of loop over i-j interaction pairs */
- /* #if GEOMETRY_J != 'Particle' */
+ /* #if GEOMETRY_I != 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endif */
/* #endif */
- /* #if GEOMETRY_J == 'Water3' */
+ /* #if 'Water' in GEOMETRY_I and GEOMETRY_J == 'Particle' */
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+ /* #elif GEOMETRY_J == 'Water3' */
gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
/* #define INNERFLOPS INNERFLOPS+9 */
{
__m128 t1,t2,t3,t4;
- t1 = _mm_castpd_ps(_mm_load_sd((const double *)p1));
- t2 = _mm_castpd_ps(_mm_load_sd((const double *)p2));
- t3 = _mm_castpd_ps(_mm_load_sd((const double *)p3));
- t4 = _mm_castpd_ps(_mm_load_sd((const double *)p4));
+ t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p1); /* - - c12a c6a */
+ t2 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p2); /* - - c12b c6b */
+ t3 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p3); /* - - c12c c6c */
+ t4 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)p4); /* - - c12d c6d */
t1 = _mm_unpacklo_ps(t1,t2);
t2 = _mm_unpacklo_ps(t3,t4);
*c6 = _mm_movelh_ps(t1,t2);
{
__m128 t1,t2,t3,t4;
- t1 = _mm_castpd_ps(_mm_load_sd((const double *)xyz_shift));
- t2 = _mm_castpd_ps(_mm_load_sd((const double *)xyz));
+ t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz_shift);
+ t2 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz);
t3 = _mm_load_ss(xyz_shift+2);
t4 = _mm_load_ss(xyz+2);
t1 = _mm_add_ps(t1,t2);
__m128 tA,tB;
__m128 t1,t2,t3,t4,t5,t6;
- tA = _mm_castpd_ps(_mm_load_sd((const double *)xyz_shift));
+ tA = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)xyz_shift);
tB = _mm_load_ss(xyz_shift+2);
t1 = _mm_loadu_ps(xyz);
_mm_storeh_pi((__m64 *)(fshiftptr+1),t3);
}
-static gmx_inline void
-gmx_mm_update_iforce_2atom_swizzle_ps(__m128 fix1, __m128 fiy1, __m128 fiz1,
- __m128 fix2, __m128 fiy2, __m128 fiz2,
- float * gmx_restrict fptr,
- float * gmx_restrict fshiftptr)
-{
- __m128 t1,t2,t4;
-
- fix1 = _mm_hadd_ps(fix1,fiy1);
- fiz1 = _mm_hadd_ps(fiz1,fix2);
- fiy2 = _mm_hadd_ps(fiy2,fiz2);
-
- fix1 = _mm_hadd_ps(fix1,fiz1); /* fix2 fiz1 fiy1 fix1 */
- fiy2 = _mm_hadd_ps(fiy2,fiy2); /* - - fiz2 fiy2 */
-
- _mm_storeu_ps(fptr, _mm_add_ps(fix1,_mm_loadu_ps(fptr) ));
- t1 = _mm_loadl_pi(_mm_setzero_ps(),(__m64 *)(fptr+4));
- _mm_storel_pi((__m64 *)(fptr+4), _mm_add_ps(fiy2,t1));
-
- t4 = _mm_load_ss(fshiftptr+2);
- t4 = _mm_loadh_pi(t4,(__m64 *)(fshiftptr));
-
- t1 = _mm_shuffle_ps(fix1,fiy2,_MM_SHUFFLE(0,0,3,2)); /* fiy2 - fix2 fiz1 */
- t1 = _mm_shuffle_ps(t1,t1,_MM_SHUFFLE(3,1,0,0)); /* fiy2 fix2 - fiz1 */
- t2 = _mm_shuffle_ps(fiy2,fix1,_MM_SHUFFLE(1,0,0,1)); /* fiy1 fix1 - fiz2 */
-
- t1 = _mm_add_ps(t1,t2);
- t1 = _mm_add_ps(t1,t4); /* y x - z */
-
- _mm_store_ss(fshiftptr+2,t1);
- _mm_storeh_pi((__m64 *)(fshiftptr),t1);
-}
-
-
-
static gmx_inline void
gmx_mm_update_iforce_3atom_swizzle_ps(__m128 fix1, __m128 fiy1, __m128 fiz1,
__m128 fix2, __m128 fiy2, __m128 fiz2,
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 162 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 139 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 185 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 189 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 169 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 145 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 125 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 161 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 164 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 144 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 147 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 132 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 132 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 119 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 109 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 140 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 130 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 116 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 116 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 84 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 156 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 159 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 124 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 127 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 179 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 182 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 147 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 150 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 141 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 213 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 216 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 201 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 204 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 254 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 258 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 242 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 246 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 195 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 198 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 186 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 189 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 195 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 198 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 186 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 189 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 157 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 160 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 134 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 137 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 179 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 183 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 156 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 160 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 115 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 155 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 158 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 135 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 123 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 111 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 123 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 111 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 144 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 145 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 117 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 118 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 164 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 165 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 138 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 139 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 126 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 97 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 149 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 149 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 120 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 142 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 143 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 121 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 122 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 167 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 168 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 146 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 147 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 131 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 132 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 109 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 152 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 130 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 88 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 128 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 96 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 81 flops */
}
/* #endfor */
/* #endif */
- /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_J */
+ /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_I */
/* #for J in PARTICLES_J */
fjx{J} = _mm_setzero_ps();
fjy{J} = _mm_setzero_ps();
fiz{I} = _mm_add_ps(fiz{I},tz);
/* #define INNERFLOPS INNERFLOPS+6 */
- /* #if GEOMETRY_J == 'Particle' */
+ /* #if GEOMETRY_I == 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endfor */
/* ## End of loop over i-j interaction pairs */
- /* #if GEOMETRY_J != 'Particle' */
+ /* #if GEOMETRY_I != 'Particle' */
/* #if ROUND == 'Loop' */
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
/* #endif */
/* #endif */
- /* #if GEOMETRY_J == 'Water3' */
+ /* #if 'Water' in GEOMETRY_I and GEOMETRY_J == 'Particle' */
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+ /* #elif GEOMETRY_J == 'Water3' */
gmx_mm_decrement_3rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,
fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
/* #define INNERFLOPS INNERFLOPS+9 */