Fixes for Windows/ICC--typecasting

author Peter Kasson <kasson@Macintosh-175.local>

Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)

committer Peter Kasson <kasson@Macintosh-175.local>

Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)
author Peter Kasson <kasson@Macintosh-175.local>
Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)
committer Peter Kasson <kasson@Macintosh-175.local>
Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)
diff --git a/include/gmx_sse2_single.h b/include/gmx_sse2_single.h

index 0f1d1b10147d6eee5806fbd3177d32095defcf14..88dbd95b62350c58002b2bff5495fe29546d5a19 100644 (file)
--- a/include/gmx_sse2_single.h
+++ b/include/gmx_sse2_single.h
@@ -87,12 +87,15 @@
  #if (defined (_MSC_VER) || defined(__INTEL_COMPILER))
  #  define gmx_mm_castsi128_ps(a) _mm_castsi128_ps(a)
  #  define gmx_mm_castps_si128(a) _mm_castps_si128(a)
+#  define gmx_mm_castps_ps128(a) (a)
  #elif defined(__GNUC__)
  #  define gmx_mm_castsi128_ps(a) ((__m128)(a))
  #  define gmx_mm_castps_si128(a) ((__m128i)(a))
+#  define gmx_mm_castps_ps128(a) ((__m128)(a))
  #else
  static __m128  gmx_mm_castsi128_ps(__m128i a) { return *(__m128 *) &a;  } 
  static __m128i gmx_mm_castps_si128(__m128 a)  { return *(__m128i *) &a; } 
+static __m128  gmx_mm_castps_ps128(__m128 a) { return *(__m128 *) &a;  } 
  #endif
  
  
diff --git a/src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel430_ia32_sse.c b/src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel430_ia32_sse.c

index 23ee89e755ec62c012379201a9439d6b88e1acfc..511c5a7c1415e7c6265328a35dae4e1c14c07430 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel430_ia32_sse.c
+++ b/src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel430_ia32_sse.c
@@ -21,7 +21,7 @@
  
  #include <xmmintrin.h>
  #include <emmintrin.h>
-
+#include <gmx_sse2_single.h>
  
  /* get gmx_gbdata_t */
  #include "../nb_kerneltype.h"
@@ -35,12 +35,12 @@ _mm_cvtsi128_si32(_mm_srli_si128((x), 4 * (imm)))
  static inline __m128
  my_invrsq_ps(__m128 x)
  {
-       const __m128 three = (const __m128) {3.0f, 3.0f, 3.0f, 3.0f};
-       const __m128 half  = (const __m128) {0.5f, 0.5f, 0.5f, 0.5f};
+       const __m128 three = {3.0f, 3.0f, 3.0f, 3.0f};
+       const __m128 half  = {0.5f, 0.5f, 0.5f, 0.5f};
         
         __m128 t1 = _mm_rsqrt_ps(x);
         
-       return (__m128) _mm_mul_ps(half,_mm_mul_ps(t1,_mm_sub_ps(three,_mm_mul_ps(x,_mm_mul_ps(t1,t1)))));
+       return gmx_mm_castps_ps128(_mm_mul_ps(half,_mm_mul_ps(t1,_mm_sub_ps(three,_mm_mul_ps(x,_mm_mul_ps(t1,t1))))));
  }
  
  void nb_kernel430_ia32_sse(int *           p_nri,
@@ -624,15 +624,15 @@ void nb_kernel430_ia32_sse(int *           p_nri,
                                 mask  = _mm_set_epi32(0,0xffffffff,0xffffffff,0xffffffff);
                         }
                         
-                       jx      = _mm_and_ps( (__m128) mask, xmm6);
-                       jy      = _mm_and_ps( (__m128) mask, xmm4);
-                       jz      = _mm_and_ps( (__m128) mask, xmm5);
+                       jx      = _mm_and_ps( gmx_mm_castsi128_ps(mask), xmm6);
+                       jy      = _mm_and_ps( gmx_mm_castsi128_ps(mask), xmm4);
+                       jz      = _mm_and_ps( gmx_mm_castsi128_ps(mask), xmm5);
                         
-                       c6      = _mm_and_ps( (__m128) mask, c6);
-                       c12     = _mm_and_ps( (__m128) mask, c12);
-                       dvdaj   = _mm_and_ps( (__m128) mask, dvdaj);
-                       isaj    = _mm_and_ps( (__m128) mask, isaj);                     
-                       q       = _mm_and_ps( (__m128) mask, q);
+                       c6      = _mm_and_ps( gmx_mm_castsi128_ps(mask), c6);
+                       c12     = _mm_and_ps( gmx_mm_castsi128_ps(mask), c12);
+                       dvdaj   = _mm_and_ps( gmx_mm_castsi128_ps(mask), dvdaj);
+                       isaj    = _mm_and_ps( gmx_mm_castsi128_ps(mask), isaj);                 
+                       q       = _mm_and_ps( gmx_mm_castsi128_ps(mask), q);
                         
                         dx1     = _mm_sub_ps(ix,jx);
                         dy1     = _mm_sub_ps(iy,jy);
@@ -710,8 +710,8 @@ void nb_kernel430_ia32_sse(int *           p_nri,
                         xmm1    = _mm_mul_ps(xmm1,isaj);
                         dvdaj   = _mm_add_ps(dvdaj,xmm1);
                         
-                       vcoul   = _mm_and_ps( (__m128) mask, vcoul);
-                       vgb     = _mm_and_ps( (__m128) mask, vgb);
+                       vcoul   = _mm_and_ps( gmx_mm_castsi128_ps(mask), vcoul);
+                       vgb     = _mm_and_ps( gmx_mm_castsi128_ps(mask), vgb);
                         
                         vctot   = _mm_add_ps(vctot,vcoul);
                         vgbtot  = _mm_add_ps(vgbtot,vgb);
@@ -897,9 +897,9 @@ void nb_kernel430_ia32_sse(int *           p_nri,
                                 _mm_store_ss(faction+j33+2,xmm7); 
                         }
                         
-                       t1 = _mm_and_ps( (__m128) mask, t1);
-                       t2 = _mm_and_ps( (__m128) mask, t2);
-                       t3 = _mm_and_ps( (__m128) mask, t3);
+                       t1 = _mm_and_ps( gmx_mm_castsi128_ps(mask), t1);
+                       t2 = _mm_and_ps( gmx_mm_castsi128_ps(mask), t2);
+                       t3 = _mm_and_ps( gmx_mm_castsi128_ps(mask), t3);
                         
                         fix = _mm_add_ps(fix,t1);
                         fiy = _mm_add_ps(fiy,t2);
@@ -924,7 +924,7 @@ void nb_kernel430_ia32_sse(int *           p_nri,
                 
                 xmm2    = _mm_unpacklo_ps(fix,fiy); /* fx, fy, - - */
                 xmm2    = _mm_movelh_ps(xmm2,fiz); 
-               xmm2    = _mm_and_ps( (__m128) maski, xmm2);
+               xmm2    = _mm_and_ps( gmx_mm_castsi128_ps(maski), xmm2);
                 
                 /* load, add and store i forces */
                 xmm4    = _mm_loadl_pi(xmm4, (__m64 *) (faction+ii3));
author	Peter Kasson <kasson@Macintosh-175.local>
	Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)
committer	Peter Kasson <kasson@Macintosh-175.local>
	Sat, 17 Apr 2010 23:52:20 +0000 (16:52 -0700)
include/gmx_sse2_single.h		patch \| blob \| history
src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel430_ia32_sse.c		patch \| blob \| history