Use reduce in pme
authorRoland Schulz <roland@rschulz.eu>
Mon, 10 Feb 2014 21:30:50 +0000 (16:30 -0500)
committerMark Abraham <mark.j.abraham@gmail.com>
Wed, 26 Feb 2014 12:57:34 +0000 (13:57 +0100)
Use the reduce added to the simd module. Also fixes that
aligned store was used for possible unaligned stack buffer.

Change-Id: I5e08b0a0998fe5371148d1af9668fd6f2ecd5934

src/gromacs/mdlib/pme_simd4.h

index c49e54245a6e024c8b756287a4181cb1c7d5a847..862afe75bf13110a47cd7c81838b70fb59011d96 100644 (file)
@@ -94,8 +94,6 @@
  * This code does not assume any memory alignment for the grid.
  */
 {
-    real             fx_tmp[4], fy_tmp[4], fz_tmp[4];
-
     gmx_simd4_real_t fx_S, fy_S, fz_S;
 
     gmx_simd4_real_t tx_S, ty_S, tz_S;
         }
     }
 
-    gmx_simd4_storeu_r(fx_tmp, fx_S);
-    gmx_simd4_storeu_r(fy_tmp, fy_S);
-    gmx_simd4_storeu_r(fz_tmp, fz_S);
-
-    fx += fx_tmp[0]+fx_tmp[1]+fx_tmp[2]+fx_tmp[3];
-    fy += fy_tmp[0]+fy_tmp[1]+fy_tmp[2]+fy_tmp[3];
-    fz += fz_tmp[0]+fz_tmp[1]+fz_tmp[2]+fz_tmp[3];
+    fx += gmx_simd4_reduce_r(fx_S);
+    fy += gmx_simd4_reduce_r(fy_S);
+    fz += gmx_simd4_reduce_r(fz_S);
 }
 #undef PME_GATHER_F_SIMD4_ORDER4
 #endif
 {
     int              offset;
 
-    real             fx_tmp[4], fy_tmp[4], fz_tmp[4];
-
     gmx_simd4_real_t fx_S, fy_S, fz_S;
 
     gmx_simd4_real_t tx_S, ty_S, tz_S0, tz_S1;
         }
     }
 
-    gmx_simd4_store_r(fx_tmp, fx_S);
-    gmx_simd4_store_r(fy_tmp, fy_S);
-    gmx_simd4_store_r(fz_tmp, fz_S);
-
-    fx += fx_tmp[0]+fx_tmp[1]+fx_tmp[2]+fx_tmp[3];
-    fy += fy_tmp[0]+fy_tmp[1]+fy_tmp[2]+fy_tmp[3];
-    fz += fz_tmp[0]+fz_tmp[1]+fz_tmp[2]+fz_tmp[3];
+    fx += gmx_simd4_reduce_r(fx_S);
+    fy += gmx_simd4_reduce_r(fy_S);
+    fz += gmx_simd4_reduce_r(fz_S);
 }
 #undef PME_ORDER
 #undef PME_GATHER_F_SIMD4_ALIGNED