diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index e9388e300d..2ed461927c 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -295,22 +295,6 @@ os windows? [ 4 "double" c-type (>>align) ] unless -USING: cpu.x86.features cpu.x86.features.private ; +USE: vocabs.loader -"-no-sse2" (command-line) member? [ - [ { check_sse2 } compile ] with-optimizer - - "Checking if your CPU supports SSE2..." print flush - sse2? [ - " - yes" print - enable-sse2 - [ - sse2? [ - "This image was built to use SSE2, which your CPU does not support." print - "You will need to bootstrap Factor again." print - flush - 1 exit - ] unless - ] "cpu.x86" add-init-hook - ] [ " - no" print ] if -] unless +"cpu.x86.features" require diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index a7a4e783c3..df9287162c 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -221,12 +221,11 @@ enable-alien-4-intrinsics ! Enable fast calling of libc math functions enable-float-functions -! SSE2 is always available on x86-64. -enable-sse2 - USE: vocabs.loader { { [ os unix? ] [ "cpu.x86.64.unix" require ] } { [ os winnt? ] [ "cpu.x86.64.winnt" require ] } } cond + +"cpu.x86.features" require diff --git a/basis/cpu/x86/features/features.factor b/basis/cpu/x86/features/features.factor index bc4818d6af..878fb592b9 100644 --- a/basis/cpu/x86/features/features.factor +++ b/basis/cpu/x86/features/features.factor @@ -1,21 +1,29 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: system kernel math alien.syntax ; +USING: system kernel math alien.syntax combinators locals init io +cpu.x86 compiler compiler.units accessors ; IN: cpu.x86.features -HOOK: sse2? cpu ( -- ? ) +ALIAS: sse-version sse_version -M: x86.32 sse2? check_sse2 ; - -M: x86.64 sse2? t ; +: sse-string ( version -- string ) + { + { 00 [ "no SSE" ] } + { 10 [ "SSE1" ] } + { 20 [ "SSE2" ] } + { 30 [ "SSE3" ] } + { 33 [ "SSSE3" ] } + { 41 [ "SSE4.1" ] } + { 42 [ "SSE4.2" ] } + } case ; HOOK: instruction-count cpu ( -- n ) @@ -23,3 +31,36 @@ M: x86 instruction-count read_timestamp_counter ; : count-instructions ( quot -- n ) instruction-count [ call ] dip instruction-count swap - ; inline + +USING: cpu.x86.features cpu.x86.features.private ; + +:: install-sse-check ( version -- ) + [ + sse-version version < [ + "This image was built to use " write + version sse-string write + " but your CPU supports " write + sse-version sse-string write "." print + "You will need to bootstrap Factor again." print + flush + 1 exit + ] when + ] "cpu.x86" add-init-hook ; + +: enable-sse ( version -- ) + { + { 00 [ ] } + { 10 [ ] } + { 20 [ enable-sse2 ] } + { 30 [ enable-sse3 ] } + { 33 [ enable-sse3 ] } + { 41 [ enable-sse3 ] } + { 42 [ enable-sse3 ] } + } case ; + +[ { sse_version } compile ] with-optimizer + +"Checking for multimedia extensions: " write sse-version +[ sse-string write " detected" print ] +[ install-sse-check ] +[ enable-sse ] tri diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index af548a1f2b..602dfd6541 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -700,6 +700,11 @@ M: x86 small-enough? ( n -- ? ) : enable-sse2 ( -- ) enable-float-intrinsics enable-fsqrt - enable-float-min/max ; + enable-float-min/max + enable-sse2-simd ; + +: enable-sse3 ( -- ) + enable-sse2 + enable-sse3-simd ; enable-min/max diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 39ce72356b..a36eee9081 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -4,7 +4,7 @@ USING: accessors alien.c-types byte-arrays cpu.architecture generalizations kernel math math.functions math.vectors math.vectors.simd.functor math.vectors.specialization parser prettyprint.custom sequences sequences.private -specialized-arrays.double locals assocs literals ; +specialized-arrays.double locals assocs literals words fry ; IN: math.vectors.simd { norm [ norm-sq sqrt ] } { normalize [ dup norm v/n ] } { distance [ v- norm ] } - } assoc-union + } ; + +:: simd-vector-words ( class ctor elt-type assoc -- ) + class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union specialize-vector-words ; PRIVATE> diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S index a879712190..10cd7f2383 100644 --- a/vm/cpu-x86.32.S +++ b/vm/cpu-x86.32.S @@ -44,17 +44,6 @@ DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, voi add $12,%esp /* pop args from the stack */ ret /* return _with new stack_ */ -/* cpu.x86.32 calls this */ -DEF(bool,check_sse2,(void)): - push %ebx - mov $1,%eax - cpuid - shr $26,%edx - and $1,%edx - pop %ebx - mov %edx,%eax - ret - DEF(long long,read_timestamp_counter,(void)): rdtsc ret @@ -72,6 +61,5 @@ DEF(void,primitive_inline_cache_miss_tail,(void)): #ifdef WINDOWS .section .drectve - .ascii " -export:check_sse2" .ascii " -export:read_timestamp_counter" #endif diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S index e83bb0fd7d..09a423d6b1 100644 --- a/vm/cpu-x86.S +++ b/vm/cpu-x86.S @@ -68,7 +68,44 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)): add $STACK_PADDING,STACK_REG jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */ +/* cpu.x86.features calls this */ +DEF(bool,sse_version,(void)): + mov $0x1,RETURN_REG + cpuid + test $0x100000,%ecx + jnz sse_42 + test $0x80000,%ecx + jnz sse_41 + test $0x200,%ecx + jnz ssse_3 + test $0x1,%ecx + jnz sse_3 + test $0x4000000,%edx + jnz sse_2 + test $0x2000000,%edx + jnz sse_1 + mov $0,%eax + ret +sse_42: + mov $42,RETURN_REG + ret +sse_41: + mov $41,RETURN_REG + ret +ssse_3: + mov $33,RETURN_REG + ret +sse_3: + mov $30,RETURN_REG + ret +sse_2: + mov $20,RETURN_REG + ret +sse_1: + mov $10,RETURN_REG + ret #ifdef WINDOWS .section .drectve + .ascii " -export:sse_version" .ascii " -export:c_to_factor" #endif