vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 152 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 153 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 129 flops */
}
vdwjidx0C = 2*vdwtype[jnrC+0];
vdwjidx0D = 2*vdwtype[jnrD+0];
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy0 = _mm_add_ps(fiy0,ty);
fiz0 = _mm_add_ps(fiz0,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
/**************************
* CALCULATE INTERACTIONS *
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
-
+
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
+
/* Inner loop uses 130 flops */
}