Added check for SSE2 support to use generalized born kernels
authorErik Lindahl <lindahl@cbr.su.se>
Fri, 27 Aug 2010 20:22:40 +0000 (22:22 +0200)
committerErik Lindahl <lindahl@cbr.su.se>
Fri, 27 Aug 2010 20:22:40 +0000 (22:22 +0200)
src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel_ia32_sse.c
src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel_ia32_sse_test_asm.s
src/gmxlib/nonbonded/nb_kernel_ia32_sse/nb_kernel_ia32_sse_test_asm_intel_syntax.s
src/gmxlib/nonbonded/nb_kernel_x86_64_sse/nb_kernel_x86_64_sse.c
src/gmxlib/nonbonded/nb_kernel_x86_64_sse/nb_kernel_x86_64_sse_test_asm.s
src/gmxlib/nonbonded/nb_kernel_x86_64_sse/nb_kernel_x86_64_sse_test_asm_intel_syntax.s
src/mdlib/forcerec.c

index 15bb53ba2259528ce6915bf914c6230bce80bc5b..595b17173481dc55ed4c6e794d54a730f407c372 100644 (file)
@@ -209,7 +209,7 @@ nb_kernel_ia32_sse_test(FILE *                log)
 #endif
     
     if(log)
-        fprintf(log,"Testing ia32 SSE support...");
+        fprintf(log,"Testing ia32 SSE2 support...");
 
        nb_kernel_ia32_sse_present = TRUE;
        signal(SIGILL,nb_kernel_ia32_sse_sigill_handler);
index e767a00997e5afb3003105cf21df066722c1d014..66ef51bae90ae663a71b4051f02baa844046162b 100644 (file)
 nb_kernel_ia32_sse_test_asm: 
 _nb_kernel_ia32_sse_test_asm: 
         emms
-        xorps %xmm0,%xmm0
+        xorpd %xmm0,%xmm0
         emms
         ret
 
-
-
-
index 507e2bf90e1f35afdbd34ac11575b7e900217c09..c3d06fa1ab04748ddcdfe10ef7fd6a6fe69971f8 100644 (file)
 .globl _nb_kernel_ia32_sse_test_asm
 nb_kernel_ia32_sse_test_asm:
 _nb_kernel_ia32_sse_test_asm:
-       emms
-       xorps xmm0, xmm0
-       emms
-       ret
+        emms
+        xorpd xmm0, xmm0
+        emms
+        ret
+
 
        
        
index 4af2c05cd136eff109233f2338124369b5ffb7ff..856ba32c8db2dd0892535d876d8052ae19de461a 100644 (file)
@@ -208,7 +208,7 @@ nb_kernel_x86_64_sse_test(FILE *                log)
 #endif
     
     if(log)
-        fprintf(log,"Testing x86_64 SSE support...");
+        fprintf(log,"Testing x86_64 SSE2 support...");
 
        nb_kernel_x86_64_sse_present = TRUE;
        signal(SIGILL,nb_kernel_x86_64_sse_sigill_handler);
index 71219bed4609c063328071a845cc3b8fbf3f108d..138907526c9926d8745d58b1d2116f9c1c53e6db 100644 (file)
@@ -21,7 +21,7 @@ nb_kernel_x86_64_sse_test_asm:
 _nb_kernel_x86_64_sse_test_asm: 
         push %rbx              ## test 64-bit register
         emms
-        xorps %xmm0,%xmm0       ## test SSE
+        xorpd %xmm0,%xmm0       ## test SSE2
         emms
         pop  %rbx              ## test 64-bit register
         ret
index 18d375dbf4bee73eaf9f7afcfee0173332f36485..b21f5c9756a79837111c411cdc4ab673749a9113 100644 (file)
 .globl _nb_kernel_x86_64_sse_test_asm
 nb_kernel_x86_64_sse_test_asm:
 _nb_kernel_x86_64_sse_test_asm:
-       push  rbx               ;# test 64-bit register
-       emms
-       xorps xmm0, xmm0        ;# test SSE
-       emms
-       pop   rbx               ;# test 64-bit register
-       ret
-
+        push  rbx               ;# test 64-bit register
+        emms
+        xorpd xmm0, xmm0        ;# test SSE2
+        emms
+        pop   rbx               ;# test 64-bit register
+        ret
        
        
index 309c0f7196cd157cd7789c8830907762d8103796..a13862fb3061155e762256e4c3c6342b3dd3642b 100644 (file)
 #include "copyrite.h"
 #include "mtop_util.h"
 
+
+#ifdef _MSC_VER
+/* MSVC definition for __cpuid() */
+#include <intrin.h>
+#endif
+
+
+
 t_forcerec *mk_forcerec(void)
 {
   t_forcerec *fr;
@@ -1220,6 +1228,57 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 }
 
 
+/* Return 1 if SSE2 support is present, otherwise 0. */
+static int 
+forcerec_check_sse2()
+{
+#if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE)|| defined(GMX_SSE2) )
+       unsigned int level;
+       unsigned int _eax,_ebx,_ecx,_edx;
+       int status;
+       int CPUInfo[4];
+       
+       level = 1;
+#ifdef _MSC_VER
+       __cpuid(CPUInfo,1);
+       
+       _eax=CPUInfo[0];
+       _ebx=CPUInfo[1];
+       _ecx=CPUInfo[2];
+       _edx=CPUInfo[3];
+       
+#elif defined(__x86_64__)
+       /* GCC 64-bit inline asm */
+       __asm__ ("push %%rbx\n\tcpuid\n\tpop %%rbx\n"                 \
+                        : "=a" (_eax), "=S" (_ebx), "=c" (_ecx), "=d" (_edx) \
+                        : "0" (level));
+#elif defined(__i386__)
+       __asm__ ("push %%ebx\n\tcpuid\n\tpop %%ebx\n"                 \
+                        : "=a" (_eax), "=S" (_ebx), "=c" (_ecx), "=d" (_edx) \
+                        : "0" (level));
+#else
+       _eax=_ebx=_ecx=_edx=0;
+#endif
+    
+       /* Features:                                                                                                       
+        *                                                                                                                 
+        * SSE      Bit 25 of edx should be set                                                                            
+        * SSE2     Bit 26 of edx should be set                                                                            
+        * SSE3     Bit  0 of ecx should be set                                                                            
+        * SSE4.1   Bit 19 of ecx should be set                                                                            
+        */
+       status =  (_edx & (1 << 26)) != 0;
+    
+#else
+    status = 0;
+#endif
+       /* Return SSE2 status */
+       return status;
+}
+
+
+
+
 void init_forcerec(FILE *fp,
                    const output_env_t oenv,
                    t_forcerec *fr,
@@ -1321,6 +1380,13 @@ void init_forcerec(FILE *fp,
                 "\nFound environment variable GMX_NOOPTIMIZEDKERNELS.\n"
                 "Disabling SSE/SSE2/Altivec/ia64/Power6/Bluegene specific kernels.\n\n");
     }    
+
+#if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE)|| defined(GMX_SSE2) )
+    if( forcerec_check_sse2() == 0 )
+    {
+        fr->UseOptimizedKernels = FALSE;
+    }
+#endif
     
     /* Check if we can/should do all-vs-all kernels */
     fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);