Fixed AVX256 double-precision kernels for gcc
authorErik Lindahl <erik@kth.se>
Thu, 20 Dec 2012 14:05:42 +0000 (15:05 +0100)
committerErik Lindahl <erik@kth.se>
Thu, 20 Dec 2012 14:05:42 +0000 (15:05 +0100)
Gcc is pickier with memory alignment and implicit conversions than
either clang or icc. This patch makes sure we align tables to 32-byte
boundaries (16 before), and uses explicit casts to make gcc happy.
Fixes redmine #1074.

Change-Id: I613d49dad74597ce6167e18fd3a048254bc1b859

111 files changed:
src/gmxlib/nonbonded/nb_kernel_avx_256_double/kernelutil_x86_avx_256_double.h
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_256_double.c
src/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_template_avx_256_double.pre
src/mdlib/forcerec.c
src/mdlib/tables.c

index e915536c5f95844860c9899bd318a187455d4ce2..242260ac86374d045f508a8955cf061856c0049a 100644 (file)
@@ -77,7 +77,7 @@ gmx_mm256_load_4real_swizzle_pd(const double * gmx_restrict ptrA, const double *
 
     t1 = _mm_unpacklo_pd(_mm_load_sd(ptrA),_mm_load_sd(ptrB));
     t2 = _mm_unpacklo_pd(_mm_load_sd(ptrC),_mm_load_sd(ptrD));
-    return gmx_mm256_set_m128(t2,t1);
+    return gmx_mm256_set_m128d(t2,t1);
 }
 
 
@@ -201,8 +201,8 @@ gmx_mm256_load_4pair_swizzle_pd(const double * gmx_restrict p1, const double * g
 {
     __m256d t1,t2;
 
-    t1   = gmx_mm256_set_m128(_mm_loadu_pd(p3),_mm_loadu_pd(p1)); /* c12c  c6c | c12a  c6a */
-    t2   = gmx_mm256_set_m128(_mm_loadu_pd(p4),_mm_loadu_pd(p2)); /* c12d  c6d | c12b  c6b */
+    t1   = gmx_mm256_set_m128d(_mm_loadu_pd(p3),_mm_loadu_pd(p1)); /* c12c  c6c | c12a  c6a */
+    t2   = gmx_mm256_set_m128d(_mm_loadu_pd(p4),_mm_loadu_pd(p2)); /* c12d  c6d | c12b  c6b */
 
     *c6  = _mm256_unpacklo_pd(t1,t2); /* c6d c6c | c6b c6a */
     *c12 = _mm256_unpackhi_pd(t1,t2); /* c12d c12c | c12b c12a */
@@ -230,9 +230,9 @@ gmx_mm256_load_shift_and_1rvec_broadcast_pd(const double * gmx_restrict xyz_shif
     ty  = _mm_shuffle_pd(mem_xy,mem_xy,_MM_SHUFFLE2(1,1));
     tz  = _mm_shuffle_pd(mem_z,mem_z,_MM_SHUFFLE2(0,0));
 
-    *x1 = gmx_mm256_set_m128(tx,tx);
-    *y1 = gmx_mm256_set_m128(ty,ty);
-    *z1 = gmx_mm256_set_m128(tz,tz);
+    *x1 = gmx_mm256_set_m128d(tx,tx);
+    *y1 = gmx_mm256_set_m128d(ty,ty);
+    *z1 = gmx_mm256_set_m128d(tz,tz);
 }
 
 
@@ -265,21 +265,21 @@ gmx_mm256_load_shift_and_3rvec_broadcast_pd(const double * gmx_restrict xyz_shif
     tx   = _mm_shuffle_pd(t1,t1,_MM_SHUFFLE2(0,0));
     ty   = _mm_shuffle_pd(t1,t1,_MM_SHUFFLE2(1,1));
     tz   = _mm_shuffle_pd(t2,t2,_MM_SHUFFLE2(0,0));
-    *x1 = gmx_mm256_set_m128(tx,tx);
-    *y1 = gmx_mm256_set_m128(ty,ty);
-    *z1 = gmx_mm256_set_m128(tz,tz);
+    *x1 = gmx_mm256_set_m128d(tx,tx);
+    *y1 = gmx_mm256_set_m128d(ty,ty);
+    *z1 = gmx_mm256_set_m128d(tz,tz);
     tx   = _mm_shuffle_pd(t2,t2,_MM_SHUFFLE2(1,1));
     ty   = _mm_shuffle_pd(t3,t3,_MM_SHUFFLE2(0,0));
     tz   = _mm_shuffle_pd(t3,t3,_MM_SHUFFLE2(1,1));
-    *x2 = gmx_mm256_set_m128(tx,tx);
-    *y2 = gmx_mm256_set_m128(ty,ty);
-    *z2 = gmx_mm256_set_m128(tz,tz);
+    *x2 = gmx_mm256_set_m128d(tx,tx);
+    *y2 = gmx_mm256_set_m128d(ty,ty);
+    *z2 = gmx_mm256_set_m128d(tz,tz);
     tx   = _mm_shuffle_pd(t4,t4,_MM_SHUFFLE2(0,0));
     ty   = _mm_shuffle_pd(t4,t4,_MM_SHUFFLE2(1,1));
     tz   = _mm_shuffle_pd(t5,t5,_MM_SHUFFLE2(0,0));
-    *x3 = gmx_mm256_set_m128(tx,tx);
-    *y3 = gmx_mm256_set_m128(ty,ty);
-    *z3 = gmx_mm256_set_m128(tz,tz);
+    *x3 = gmx_mm256_set_m128d(tx,tx);
+    *y3 = gmx_mm256_set_m128d(ty,ty);
+    *z3 = gmx_mm256_set_m128d(tz,tz);
 }
 
 
@@ -315,27 +315,27 @@ gmx_mm256_load_shift_and_4rvec_broadcast_pd(const double * gmx_restrict xyz_shif
     tx   = _mm_shuffle_pd(t1,t1,_MM_SHUFFLE2(0,0));
     ty   = _mm_shuffle_pd(t1,t1,_MM_SHUFFLE2(1,1));
     tz   = _mm_shuffle_pd(t2,t2,_MM_SHUFFLE2(0,0));
-    *x1 = gmx_mm256_set_m128(tx,tx);
-    *y1 = gmx_mm256_set_m128(ty,ty);
-    *z1 = gmx_mm256_set_m128(tz,tz);
+    *x1 = gmx_mm256_set_m128d(tx,tx);
+    *y1 = gmx_mm256_set_m128d(ty,ty);
+    *z1 = gmx_mm256_set_m128d(tz,tz);
     tx   = _mm_shuffle_pd(t2,t2,_MM_SHUFFLE2(1,1));
     ty   = _mm_shuffle_pd(t3,t3,_MM_SHUFFLE2(0,0));
     tz   = _mm_shuffle_pd(t3,t3,_MM_SHUFFLE2(1,1));
-    *x2 = gmx_mm256_set_m128(tx,tx);
-    *y2 = gmx_mm256_set_m128(ty,ty);
-    *z2 = gmx_mm256_set_m128(tz,tz);
+    *x2 = gmx_mm256_set_m128d(tx,tx);
+    *y2 = gmx_mm256_set_m128d(ty,ty);
+    *z2 = gmx_mm256_set_m128d(tz,tz);
     tx   = _mm_shuffle_pd(t4,t4,_MM_SHUFFLE2(0,0));
     ty   = _mm_shuffle_pd(t4,t4,_MM_SHUFFLE2(1,1));
     tz   = _mm_shuffle_pd(t5,t5,_MM_SHUFFLE2(0,0));
-    *x3 = gmx_mm256_set_m128(tx,tx);
-    *y3 = gmx_mm256_set_m128(ty,ty);
-    *z3 = gmx_mm256_set_m128(tz,tz);
+    *x3 = gmx_mm256_set_m128d(tx,tx);
+    *y3 = gmx_mm256_set_m128d(ty,ty);
+    *z3 = gmx_mm256_set_m128d(tz,tz);
     tx   = _mm_shuffle_pd(t5,t5,_MM_SHUFFLE2(1,1));
     ty   = _mm_shuffle_pd(t6,t6,_MM_SHUFFLE2(0,0));
     tz   = _mm_shuffle_pd(t6,t6,_MM_SHUFFLE2(1,1));
-    *x4 = gmx_mm256_set_m128(tx,tx);
-    *y4 = gmx_mm256_set_m128(ty,ty);
-    *z4 = gmx_mm256_set_m128(tz,tz);
+    *x4 = gmx_mm256_set_m128d(tx,tx);
+    *y4 = gmx_mm256_set_m128d(ty,ty);
+    *z4 = gmx_mm256_set_m128d(tz,tz);
 }
 
 
@@ -1333,7 +1333,7 @@ gmx_mm256_update_iforce_1atom_swizzle_pd(__m256d fix1, __m256d fiy1, __m256d fiz
     tA   = _mm_add_pd(_mm256_castpd256_pd128(fix1),_mm256_extractf128_pd(fix1,0x1));
     tB   = _mm_add_pd(_mm256_castpd256_pd128(fiz1),_mm256_extractf128_pd(fiz1,0x1));
 
-    fix1 = gmx_mm256_set_m128(tB,tA); /* 0 fiz fiy fix */
+    fix1 = gmx_mm256_set_m128d(tB,tA); /* 0 fiz fiy fix */
 
     t1   = _mm256_loadu_pd(fptr);
     t2   = _mm256_loadu_pd(fshiftptr);
@@ -1363,7 +1363,7 @@ gmx_mm256_update_iforce_2atom_swizzle_pd(__m256d fix1, __m256d fiy1, __m256d fiz
     tB   = _mm_add_pd(_mm256_castpd256_pd128(fiz1),_mm256_extractf128_pd(fiz1,0x1)); /* fix2 fiz1 */
     tC   = _mm_add_pd(_mm256_castpd256_pd128(fiy2),_mm256_extractf128_pd(fiy2,0x1)); /* fiz2 fiy2 */
     
-    t1   = gmx_mm256_set_m128(tB,tA); /* fix2 fiz1 | fiy1 fix1 */
+    t1   = gmx_mm256_set_m128d(tB,tA); /* fix2 fiz1 | fiy1 fix1 */
 
     t2   = _mm256_loadu_pd(fptr);
     tD   = _mm_loadu_pd(fptr+4);
index 5e579d1f618c687e3f23df2ebc81b29279cc3abb..4e471c3f7c91a613afb70e3e4d6d8ece9547b52e 100644 (file)
@@ -306,7 +306,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -702,7 +702,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 6f57d4038d5ce09ece063217f78968c0daf9ec95..2999542943fa28f89f0e1bacca86581fd777d7ae 100644 (file)
@@ -435,7 +435,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1071,7 +1071,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ad08857305bf9d76f83296bfdeaf27ebb03de497..6de88eae5b1c0c839d98bb7812d7439649b51221 100644 (file)
@@ -740,7 +740,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1954,7 +1954,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 4b005303769bd90e4cc025cf4e362f195529c61d..c1a36e5ec7ff1a2024e691c105959f21b843dcc3 100644 (file)
@@ -478,7 +478,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1195,7 +1195,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c7f3e88822ad6a852a6f03722ab968e1b8da7e23..f15087d8c2d6d81d6ae09ebe5d40d455973cc30f 100644 (file)
@@ -788,7 +788,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2091,7 +2091,7 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 4c3835b2cff175b9931c1e61ec2ea4c5ab57d1b4..e0fe9b7eb5068950827caf58d121953af94d234c 100644 (file)
@@ -286,7 +286,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -644,7 +644,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b8eaf5bb463eb7b46a2e0d9f6f66007dff5c253b..b0242ced74d0a3820d4d1646235ee4cbc59ed77a 100644 (file)
@@ -415,7 +415,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1013,7 +1013,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 04d6e3566b2371d3d72cb0a13754d77a59dd514a..075e7448a1448e37051e79faf6191a0b461f9d96 100644 (file)
@@ -720,7 +720,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1896,7 +1896,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index d8ceaeda18ba2fd9a5c26ac93f46582d2ab14809..adb9e10230bfe767b298984cc0c64b2cc067467d 100644 (file)
@@ -449,7 +449,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1109,7 +1109,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 26291eb122534ef0d2ea6e82bfc8ed7c7b63414f..36c78bb7b34544f6a59fafc1a274a461d49ad43d 100644 (file)
@@ -759,7 +759,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2005,7 +2005,7 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 0d0d6fba50ff82f816d89517b67645b72f21dcb5..ef9f48193e6e7f128cf5f66a2033738f07501012 100644 (file)
@@ -255,7 +255,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -565,7 +565,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 27ef79745ced7ffec86757b06c50d3515e27ac85..1e8c7123fb3440c808cd4fc36b3c1175db06e687 100644 (file)
@@ -384,7 +384,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -934,7 +934,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 0c923102df6696f634dbb8b6feb7cff84dad0da9..be53c0da10b1de5635fd76809a8641589df4226a 100644 (file)
@@ -695,7 +695,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1838,7 +1838,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 7dc8f6e2f94344cc05313779799f264a71c2c622..2e0cffc04fb10dbaa71e93e24fff4760bf7ad2a6 100644 (file)
@@ -384,7 +384,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -934,7 +934,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b21dbd4acf2fdeebb26afc665cd272afe68b73d8..a74295dea453eb39ee8ccf8d8dac74ff8dc29b40 100644 (file)
@@ -695,7 +695,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1838,7 +1838,7 @@ nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 9422be871bb8004b57055f499c02a1df62028dce..5d2ea1a29e0a0abe427eea9a8f21ada987998658 100644 (file)
@@ -298,7 +298,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -680,7 +680,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 15d009fab4d7cd0dd69325cc6f3447a0c3de159c..73911c879c2b015671e07f6082312d4a8925975c 100644 (file)
@@ -395,7 +395,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -955,7 +955,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 64d198b7b8420689ddaafa429e8e2c1ff5361952..f7daf7473fdcda7f9e9eaf7cc9e675365447708d 100644 (file)
@@ -604,7 +604,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1556,7 +1556,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 9653ff01e89d6fdb2fef3caefbfb51fcf5819a8b..208650a52f1136ccd1ddfedc0e2be013ff7aedee 100644 (file)
@@ -430,7 +430,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1054,7 +1054,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 94a19d8aecc647d63dc47162d6bc7b2a0f2b7c38..98226b9f9ff9d9552baafbf9426764e8a1cb3318 100644 (file)
@@ -644,7 +644,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1668,7 +1668,7 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 693980c7776c72dabdfdeb75164800747e3d4000..d67174bec1f1d0f02facc05de4b00e03441bc7b3 100644 (file)
@@ -262,7 +262,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -580,7 +580,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 57000e7b1eee96aca86001ca24d18bfbf882e31e..a528385eb0e73e8bed5304785a21b1fedb9a4e2b 100644 (file)
@@ -359,7 +359,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -855,7 +855,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 2d71191330e58d3ed67372ce7f60a353c6d90da8..58a632e437234f6fd3c5bf6dabf9176f98c1c26b 100644 (file)
@@ -568,7 +568,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1456,7 +1456,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b4708dd5fc9b8b0bbd25c4860b7888bf6801f930..701d71ffef27a0ed4c9148876d27f8674ef83b5f 100644 (file)
@@ -394,7 +394,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -954,7 +954,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ddb6425c2e029ee2cc8dca6dcbe4ed6876ad6ca4..0fd01e6507d24236295853d80132434c4d295a58 100644 (file)
@@ -608,7 +608,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1568,7 +1568,7 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c57f5c239c47f827ca41eb67da69b968b2ad0bfa..934ea7dd51f9781f0316d35753e6a96636b4abbc 100644 (file)
@@ -233,7 +233,7 @@ nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -507,7 +507,7 @@ nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 162245432564888cfed2dd91cc193558b58b71a0..e63d4842f67441a4044312048895da4f6ff10c39 100644 (file)
@@ -330,7 +330,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -782,7 +782,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 8462f8620996a675325394652eb60b83fba95b3a..8db5f4bc1bac98cfe93ce8a2e7d345bc41ac1aa7 100644 (file)
@@ -545,7 +545,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1404,7 +1404,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index fcfafbc61a42a60eabdb02cc2aea0c01f63b80cb..a02c629844c1a5ae5dd4a63e31761a2f563fffb7 100644 (file)
@@ -330,7 +330,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -782,7 +782,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 31c3c94014f8258c1fc9f5faa62627cb8a6d9a98..299a85acc61db1a43c019aca48fa56beafc1163f 100644 (file)
@@ -545,7 +545,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1404,7 +1404,7 @@ nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index e9d4cd5d25c3f14f04a2fd7ff351ddd1045e0430..afd626dd74a9b4365c04b7fa90207047b7712e9f 100644 (file)
@@ -310,7 +310,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -696,7 +696,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 6de6a74a28a314929c1e681dc3336a57d2d4a464..79c9f51a36e9112b5c9022b9a7a99d63def804d8 100644 (file)
@@ -457,7 +457,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1111,7 +1111,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ac5428beaebc93538aa8031df7fa78c67ba02bb7..a8b8f82c08eb57745055d741893936f15ab89330 100644 (file)
@@ -816,7 +816,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2132,7 +2132,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b16c6db54b4cc0ff835a6d72c9d5e8e4edfa9e2b..27d672d3c120aeb4fe40524a3fb515987a9d8ed8 100644 (file)
@@ -501,7 +501,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1237,7 +1237,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index d0351ef6aa9aed92644098cb93390326ad34f587..4915511e113b950e1f8980ace10a13cd7827ad7a 100644 (file)
@@ -865,7 +865,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2271,7 +2271,7 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 18cdb9503382f34c788b57ca0fcab7bb47f321eb..5db2b6dd21c66d75207622b47f86991783be22b3 100644 (file)
@@ -276,7 +276,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -613,7 +613,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 850fd9fef92d74a8e225d677e6688d92b0cbc68f..aefb72e463fc920a07e8089e95cc3a1e1f7ab233 100644 (file)
@@ -423,7 +423,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1028,7 +1028,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 737096d54596ce312cf45b0d68d72374ea9f98b2..9fd59160c388b5d5ff19596ffa551221b35cabc9 100644 (file)
@@ -788,7 +788,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2070,7 +2070,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 5a3e04a34d1bed1715d67cc3460a1c2b305a8822..f167fc0a70114361ca114cb2537c7ad265eb5684 100644 (file)
@@ -423,7 +423,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1028,7 +1028,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 6fc666c14b484394057ec46f254a6584f87ec374..018c05a12e4a9b06f20cfcbc11b8c68ef0c9ba58 100644 (file)
@@ -788,7 +788,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2070,7 +2070,7 @@ nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 0e4c619dc6980563262f1b72a8d6167dfa8b8ab4..bc79ecfa1c41da09c7385e5fadeca0e254c30931 100644 (file)
@@ -333,7 +333,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -761,7 +761,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c8ac07b56d7c0dc7b7bed23f973561223e438d8b..33ee7f245701b5dc7bc05b476e59bbf53108e2df 100644 (file)
@@ -502,7 +502,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1250,7 +1250,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1f4487647a8297e67c17afec83ce5be6990dc82d..93c18ee9bee9ad8f6d5b2db85d4088c68cb19e99 100644 (file)
@@ -927,7 +927,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2493,7 +2493,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c4d109ba4f9099057bc44187e9064593e5a1eca0..e971ddc79ab6e6835c22ed6bf2c25d1b60a3d1e7 100644 (file)
@@ -558,7 +558,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1413,7 +1413,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c1074761b4b357b9893dc226372e9d0c5ec76909..6a8b0c4f40d62513045f7e5db8088ca6c0de408a 100644 (file)
@@ -988,7 +988,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2669,7 +2669,7 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 096a790eba0bf23a32fd9fe3a152c4dcb8991b76..79dcfe0d62d1e54b997347731698fcf6b34c7669 100644 (file)
@@ -301,7 +301,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -678,7 +678,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 5137d85711daf465ef12746bd5351fe3625b027c..c75cbb7390342f624f9d7449f9315af8be93705b 100644 (file)
@@ -470,7 +470,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1167,7 +1167,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 6951a9d6562ba59421aed5eb68a970731dc6c42b..cfa8e445e324695e63829240d597cd410087135d 100644 (file)
@@ -901,7 +901,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2431,7 +2431,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 03b5f5fc34b38ccdb03b74a46d0011b59b166045..fcb44ac1f5e3a3876e42133b81b852580e2e768d 100644 (file)
@@ -470,7 +470,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1167,7 +1167,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 9cc88e70259ff71d81bfcd2aeb1e70eae984cf68..4ace2e87a2731e2a4c3cf9d9ab6e35456d972af3 100644 (file)
@@ -901,7 +901,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2431,7 +2431,7 @@ nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 287fb2eef64364f2e406bdaed59124fd6decaedd..6eb8d61bc9fcf3674a5b2cf9c887f4ef00697df9 100644 (file)
@@ -324,7 +324,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -740,7 +740,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 4b3078ad9cdb9b11accb513018b2c695c317e490..0cf578d4a80ff808c55ac401cf40a4385a473039 100644 (file)
@@ -451,7 +451,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1097,7 +1097,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 91e8d98a978d8c15c660b9023231254e5998a368..5457ed1e947ab47f82c4685108efed3f99dfd0b6 100644 (file)
@@ -750,7 +750,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1944,7 +1944,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 82b44f266786a4f361275d29a2a5b5728f939863..54bd0d940aeb45847a1138416ffd2c949d8d707c 100644 (file)
@@ -488,7 +488,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1203,7 +1203,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index e853d2fc4e719d73b436552987e370f351fd3df7..e885cf9585f471afeef8533ff5a198976ba5c2c8 100644 (file)
@@ -792,7 +792,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -2063,7 +2063,7 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index d110386d842f24eb7eac481621016cdccb46d2be..6f249032d2b3fa138720a9f9140d47104b0bfa7d 100644 (file)
@@ -290,7 +290,7 @@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -647,7 +647,7 @@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1adcedfddbe0b642ab74218d40ef02dffab1b98d..1ec1fd6b247c38f5f6476f481fbd9e7d3d059437 100644 (file)
@@ -417,7 +417,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1004,7 +1004,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b0ee8bd719e05c8f97f6ad3f1e0373c423b7b7da..25563d326e3926146ef14bde87cb304d1fee8f69 100644 (file)
@@ -716,7 +716,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1851,7 +1851,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index f8ac075100951d39221ddb3a1e1c0156acd68ea3..3424c8438e7b1e1424a6eba937b2c1a24925727f 100644 (file)
@@ -452,7 +452,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1103,7 +1103,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 8d250c4c2035de05b29c0dca4e4ad4bd6da0a489..2c6cb48220f334a4955265e39a0ae5a744f198fa 100644 (file)
@@ -756,7 +756,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1963,7 +1963,7 @@ nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index cf1fb2f66d7d0a02d4a5ffc6bb6ae696371c2f86..07252ae3fba0d411e5467553a5974614390db56b 100644 (file)
@@ -261,7 +261,7 @@ nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -574,7 +574,7 @@ nb_kernel_ElecEw_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index cb3127b33a28b5931e430c8ba62ab20e2860e31d..fa76a525c47ab12d9ba925b9bda1ad1adae6dd29 100644 (file)
@@ -388,7 +388,7 @@ nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -931,7 +931,7 @@ nb_kernel_ElecEw_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 5b47b24cf2098ab6affde0a27e3d66ee39fc0345..478a0d621ab5a814271a557b3a88fd83adcb8fff 100644 (file)
@@ -693,7 +693,7 @@ nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1799,7 +1799,7 @@ nb_kernel_ElecEw_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ecc8c675f3163a51c754ca640202dc54e0b19e90..d171dc653a75d544b9d3bd74636c036b206fd29a 100644 (file)
@@ -388,7 +388,7 @@ nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -931,7 +931,7 @@ nb_kernel_ElecEw_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 15457ea0f0c7e9c4dc55d90f18ec5b8925b20ffe..47c16d4dc26865b44ebaacc8024bcf3ea5cb3597 100644 (file)
@@ -693,7 +693,7 @@ nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1799,7 +1799,7 @@ nb_kernel_ElecEw_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1ef26a7877f2b2d2c52e44dd427bdc578f978697..93dddf903f5196b121cc8b5741e7473d1dd8d239 100644 (file)
@@ -343,7 +343,7 @@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -806,7 +806,7 @@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 246f8642309d50126bfedf5576e27e51918940fc..ceac3dcc4f6f8b3bae5afd71033211796cbe0a5b 100644 (file)
@@ -315,7 +315,7 @@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -727,7 +727,7 @@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 9d4318fdc32b344f29140b3f589f755d2545348d..d240932c4eb824932d4b25954ca6655130729614 100644 (file)
@@ -284,7 +284,7 @@ nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -648,7 +648,7 @@ nb_kernel_ElecGB_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index fb1d4115c07cf2b3c513fed0107737ee48b2caf9..47b713daead1ac800a6f247d0cc73f1d8be1c075 100644 (file)
@@ -282,7 +282,7 @@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -638,7 +638,7 @@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 559493a0220755b0417fb8581bffdb4545658772..c5b30162fd2ee24706ed6d382acd739b708c6926 100644 (file)
@@ -264,7 +264,7 @@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -583,7 +583,7 @@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index bbbe149beb962fb692ae6daf8d66d601a2e15c24..ae2c7df49769039f803f6452d57ce888d4fbef4d 100644 (file)
@@ -289,7 +289,7 @@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -651,7 +651,7 @@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index a2781a5f164a8c8296fbd00f4ea8615d38fb705b..f81b14eef12b06e825c3e061021a41cdd60509a4 100644 (file)
@@ -246,7 +246,7 @@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -538,7 +538,7 @@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 45659fc29abf21b13b28796895e91c4db15075ae..3cccb1deb8e662675941b15a9b84bed3d911d23f 100644 (file)
@@ -317,7 +317,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -726,7 +726,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 96c0b08e882b467b3e37ef5d9302a382b2239c16..47dca61994b374b9c27127bd155165e943f648a9 100644 (file)
@@ -434,7 +434,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1057,7 +1057,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 94a92b8f9c2be6cf0a3a3ce36628ca0f6d0fa850..1d9c763f4c8596317fc1a78821ac79a7c7aa5093 100644 (file)
@@ -703,7 +703,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1826,7 +1826,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ad9ffc4366d45b834d5f8223a2e9767a01870540..2c3cede629c5b4686a5562d1d1619f0be9364446 100644 (file)
@@ -468,7 +468,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1154,7 +1154,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 72695842ac4e7b8118c3054615a63fe78b341900..2444bd284deff6e4cf55146910368d0384de1e63 100644 (file)
@@ -752,7 +752,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1965,7 +1965,7 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 2702549656289db8b9a4652270c9f07b8377071e..2c3b78c50697c282ad216dcfa9452c53ddac4325 100644 (file)
@@ -285,7 +285,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -634,7 +634,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1e48649445a96bdb609647c48f01421d0e21b57e..6a232d552f63f5b316772210f8b1b8a158a917ec 100644 (file)
@@ -402,7 +402,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -965,7 +965,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 16950b998f490433cb9ee5c45216157ca5f8c799..1b7dcc4049d85567882ce42c589a2c70730934e9 100644 (file)
@@ -671,7 +671,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1734,7 +1734,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 802f3ec8e7303c446773e7c5b95fc747995e3b2b..1bb9ff2b316db062f16fc7efe3e462dcfe6ed358 100644 (file)
@@ -446,7 +446,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1091,7 +1091,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index f75304f6501f106e9edc4919b8560092b229f577..9be2260c7717bae25f1c05be10c348d8aaf27666 100644 (file)
@@ -720,7 +720,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1873,7 +1873,7 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 4b053cf3bf8cc3901fa964aed73cf338a92d7ae8..4ac2fdb4161c0300c99f771d22344bfb2daae822 100644 (file)
@@ -308,7 +308,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -696,7 +696,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 40902802b6304e8a2392d3fee1c6d8658a987e87..4d60c4858ae39419366e4ad0fec536945c517e4b 100644 (file)
@@ -425,7 +425,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1027,7 +1027,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index a74e5e0c0cb0007047d60324ec232dd247c46d2b..7f0c7a0bf855f34a5b2cb79e1221600015e65067 100644 (file)
@@ -694,7 +694,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1796,7 +1796,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 25a4f01d20e607d9ba33e142eb7ee618879af439..f53b08af1479aee0613e16ce8711f3cd0105b94b 100644 (file)
@@ -470,7 +470,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1156,7 +1156,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 3b0ebc549c962c710841956b1e6b16aa4b530f76..228039714e4be11d0c06365005805eb86d24a137 100644 (file)
@@ -744,7 +744,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1938,7 +1938,7 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index a1e0ec347dfc98371b1d6ab45e3369038f24743b..9cb4eb85e6b147ae25772d4215da7f6df1d55642 100644 (file)
@@ -251,7 +251,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -551,7 +551,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1184d658d16ee0c9ead4809bff5db1b8e9a70c5c..78fd9c887070dfa27fecd1c5eaf6b393fefd5e24 100644 (file)
@@ -368,7 +368,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -882,7 +882,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ac455d1f6fbefee07df99def12057ea1b9332276..5d3f1f0da216d29c50044d49bd944f91b8406ee1 100644 (file)
@@ -643,7 +643,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1672,7 +1672,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index e944f527abae9827556a4035ac0f9faf7e1f7de9..34e6cf2f22724bf179767496611297cf2ca7ca94 100644 (file)
@@ -368,7 +368,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -882,7 +882,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 5fea2fef74229e298efe1fe4b9278f19be80687f..5004f92d110fca3421b15157696d791d5cb4bd32 100644 (file)
@@ -643,7 +643,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1672,7 +1672,7 @@ nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index b5c44bc276934f9f0d50271873b5fd0f876a4cf5..e0e67b31f30a49c5d0e3d7d8389e329fc0d46499 100644 (file)
@@ -301,7 +301,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -685,7 +685,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c8131930dd166da6dc0f970eaa0d5ad331cb2f42..1269d37a7f77444d50bbe1023f02cb20be33d9e9 100644 (file)
@@ -398,7 +398,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -958,7 +958,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 321b5cdc32aa55e7c061d07253577ba9df7e95e5..dcc278e90491bca03aa1acf6dfecc15ece40be24 100644 (file)
@@ -607,7 +607,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1553,7 +1553,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index d2770f18bc37fadca33422f6597b12687cccde72..a0f4bed4ecd411de0688f11356e53a3636e4fbb7 100644 (file)
@@ -433,7 +433,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1057,7 +1057,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 3276052f6fa52e896be3da3c468e989b317e443c..c708b941ecb71350bac438d7a5089b3517e1e9bc 100644 (file)
@@ -647,7 +647,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1665,7 +1665,7 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index c98761fa0832a18ee90e2fc5ac43a1f0b19be866..01781d9b23577ddc42d2d94072aac62a8f3bf3db 100644 (file)
@@ -265,7 +265,7 @@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -585,7 +585,7 @@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 17679c4f35dcd1b78034e91935dc45976ffd8ed4..92032419089e26b0f36a8ed3b7baee20fdd6b14f 100644 (file)
@@ -362,7 +362,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -858,7 +858,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 1d7e534fa63b4e2cf33a03948ad1ba1bc7d1cc50..aae5b22fcb509785f559a5981663286a6712814a 100644 (file)
@@ -571,7 +571,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1453,7 +1453,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 86ea4775f6a2f97bb1a4f79af63abdd77e6f43fb..98bde4f40bf91130a0c1ec764d5646b2a321bd5d 100644 (file)
@@ -397,7 +397,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -957,7 +957,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index efca627a5346011d0490b0ed2eb7fa21acd04323..7d3ecf9a6425ca0db4faecc8df722d766ccaf584 100644 (file)
@@ -611,7 +611,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1565,7 +1565,7 @@ nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 623a01f5364209cb177d16a831fe3645ca0212d4..21d1c4bc47a922e927e99451a34722b758996f0b 100644 (file)
@@ -236,7 +236,7 @@ nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -512,7 +512,7 @@ nb_kernel_ElecRF_VdwNone_GeomP1P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 2e46f91e0299748ed405ceebd133b2d93af24bb8..92a6ddc8dea11d6d6b487c355763f9d0dd14efec 100644 (file)
@@ -333,7 +333,7 @@ nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -785,7 +785,7 @@ nb_kernel_ElecRF_VdwNone_GeomW3P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index d9f54eaec18a6ffb808bd49a2ec4b3d5d5d28ecd..66fcb4274744eb88168679da0fac618f69d7822f 100644 (file)
@@ -548,7 +548,7 @@ nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1401,7 +1401,7 @@ nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 97ea6f238852cea8c93062e7959be6e827120468..ae6785d7837866b5c48e7f08ee0506f7a341bf42 100644 (file)
@@ -333,7 +333,7 @@ nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -785,7 +785,7 @@ nb_kernel_ElecRF_VdwNone_GeomW4P1_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 82196f0425d7b988e30e3d81993ccb620ba5220d..778fbf763bff8d05e5e723633f61c98781a43747 100644 (file)
@@ -548,7 +548,7 @@ nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
@@ -1401,7 +1401,7 @@ nb_kernel_ElecRF_VdwNone_GeomW4W4_F_avx_256_double
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index 9b69cf2102f73722005550e2765af80099378316..81f76e0f160386180aff7f4d3ba34c3092d82e7b 100644 (file)
@@ -416,7 +416,7 @@ void
              * This mask will be 0xFFFFFFFF for dummy entries and 0x0 for real ones,
              * so use it as val = _mm_andnot_pd(mask,val) to clear dummy entries.
              */
-            tmpmask0 = gmx_mm_castsi128_pd(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
+            tmpmask0 = gmx_mm_castsi128_ps(_mm_cmplt_epi32(_mm_loadu_si128((const __m128i *)(jjnr+jidx)),_mm_setzero_si128()));
 
             tmpmask1 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(3,3,2,2));
             tmpmask0 = _mm_permute_ps(tmpmask0,_GMX_MM_PERMUTE(1,1,0,0));
index ce27dda2b7fdbae5fe1a67b6a648cb4711e2ba6b..dc0758a3fccc3ecd0592fee4a932c8e4af81a476 100644 (file)
@@ -1166,7 +1166,7 @@ static void make_nbf_tables(FILE *fp,const output_env_t oenv,
     nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
     nbl->table_elec.ninteractions = 1;
     nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
-    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
+    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),32);
 
     nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
     nbl->table_vdw.format = nbl->table_elec_vdw.format;
@@ -1177,7 +1177,7 @@ static void make_nbf_tables(FILE *fp,const output_env_t oenv,
     nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
     nbl->table_vdw.ninteractions = 2;
     nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
-    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
+    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),32);
 
     for(i=0; i<=nbl->table_elec_vdw.n; i++)
     {
@@ -1615,9 +1615,9 @@ static void init_ewald_f_table(interaction_const_t *ic,
     sfree_aligned(ic->tabq_coul_V);
 
     /* Create the original table data in FDV0 */
-    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
-    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
-    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
+    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,32);
+    snew_aligned(ic->tabq_coul_F,ic->tabq_size,32);
+    snew_aligned(ic->tabq_coul_V,ic->tabq_size,32);
     table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
                                 ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
 }
@@ -1652,9 +1652,9 @@ void init_interaction_const(FILE *fp,
     snew(ic, 1);
 
     /* Just allocate something so we can free it */
-    snew_aligned(ic->tabq_coul_FDV0,16,16);
-    snew_aligned(ic->tabq_coul_F,16,16);
-    snew_aligned(ic->tabq_coul_V,16,16);
+    snew_aligned(ic->tabq_coul_FDV0,16,32);
+    snew_aligned(ic->tabq_coul_F,16,32);
+    snew_aligned(ic->tabq_coul_V,16,32);
 
     ic->rlist       = fr->rlist;
     ic->rlistlong   = fr->rlistlong;
index 202a1a5b2ccd2a73cbb37a1e8c8aba00f0103567..fafcf008da510e49ab692fc1bff758bcf64b4779 100644 (file)
@@ -1085,7 +1085,7 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
    * numbers per nx+1 data points. For performance reasons we want
    * the table data to be aligned to 16-byte.
    */
-  snew_aligned(table.data, 12*(nx+1)*sizeof(real),16);
+  snew_aligned(table.data, 12*(nx+1)*sizeof(real),32);
 
   for(k=0; (k<etiNR); k++) {
     if (tabsel[k] != etabUSER) {
@@ -1203,7 +1203,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
         * to do this :-)
         */
        
-       snew_aligned(table.data,4*nx,16);
+       snew_aligned(table.data,4*nx,32);
        
        init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
        
@@ -1363,7 +1363,7 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
         * to do this :-)
         */
        
-    snew_aligned(table.data,4*nx,16);
+    snew_aligned(table.data,4*nx,32);
 
        copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);