jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 108 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = f+j_coord_offsetA;
- fjptrB = f+j_coord_offsetB;
- fjptrC = f+j_coord_offsetC;
- fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = f+j_coord_offsetA;
fjptrB = f+j_coord_offsetB;
fjptrC = f+j_coord_offsetC;
fjptrD = f+j_coord_offsetD;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}
jq0 = gmx_mm_load_4real_swizzle_ps(charge+jnrA+0,charge+jnrB+0,
charge+jnrC+0,charge+jnrD+0);
+ fjx0 = _mm_setzero_ps();
+ fjy0 = _mm_setzero_ps();
+ fjz0 = _mm_setzero_ps();
+
/**************************
* CALCULATE INTERACTIONS *
**************************/
fiy1 = _mm_add_ps(fiy1,ty);
fiz1 = _mm_add_ps(fiz1,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy2 = _mm_add_ps(fiy2,ty);
fiz2 = _mm_add_ps(fiz2,tz);
- fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
- fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
- fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
- fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
}
fiy3 = _mm_add_ps(fiy3,ty);
fiz3 = _mm_add_ps(fiz3,tz);
+ fjx0 = _mm_add_ps(fjx0,tx);
+ fjy0 = _mm_add_ps(fjy0,ty);
+ fjz0 = _mm_add_ps(fjz0,tz);
+
+ }
+
fjptrA = (jnrlistA>=0) ? f+j_coord_offsetA : scratch;
fjptrB = (jnrlistB>=0) ? f+j_coord_offsetB : scratch;
fjptrC = (jnrlistC>=0) ? f+j_coord_offsetC : scratch;
fjptrD = (jnrlistD>=0) ? f+j_coord_offsetD : scratch;
- gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,tx,ty,tz);
- }
+ gmx_mm_decrement_1rvec_4ptr_swizzle_ps(fjptrA,fjptrB,fjptrC,fjptrD,fjx0,fjy0,fjz0);
/* Inner loop uses 90 flops */
}