Detect SSE version and enable the correct set of SIMD intrinsics
parent
ae051e0c9a
commit
906a0d212a
|
@ -295,22 +295,6 @@ os windows? [
|
|||
4 "double" c-type (>>align)
|
||||
] unless
|
||||
|
||||
USING: cpu.x86.features cpu.x86.features.private ;
|
||||
USE: vocabs.loader
|
||||
|
||||
"-no-sse2" (command-line) member? [
|
||||
[ { check_sse2 } compile ] with-optimizer
|
||||
|
||||
"Checking if your CPU supports SSE2..." print flush
|
||||
sse2? [
|
||||
" - yes" print
|
||||
enable-sse2
|
||||
[
|
||||
sse2? [
|
||||
"This image was built to use SSE2, which your CPU does not support." print
|
||||
"You will need to bootstrap Factor again." print
|
||||
flush
|
||||
1 exit
|
||||
] unless
|
||||
] "cpu.x86" add-init-hook
|
||||
] [ " - no" print ] if
|
||||
] unless
|
||||
"cpu.x86.features" require
|
||||
|
|
|
@ -221,12 +221,11 @@ enable-alien-4-intrinsics
|
|||
! Enable fast calling of libc math functions
|
||||
enable-float-functions
|
||||
|
||||
! SSE2 is always available on x86-64.
|
||||
enable-sse2
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
{
|
||||
{ [ os unix? ] [ "cpu.x86.64.unix" require ] }
|
||||
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
|
||||
} cond
|
||||
|
||||
"cpu.x86.features" require
|
||||
|
|
|
@ -1,21 +1,29 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: system kernel math alien.syntax ;
|
||||
USING: system kernel math alien.syntax combinators locals init io
|
||||
cpu.x86 compiler compiler.units accessors ;
|
||||
IN: cpu.x86.features
|
||||
|
||||
<PRIVATE
|
||||
|
||||
FUNCTION: bool check_sse2 ( ) ;
|
||||
FUNCTION: int sse_version ( ) ;
|
||||
|
||||
FUNCTION: longlong read_timestamp_counter ( ) ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
HOOK: sse2? cpu ( -- ? )
|
||||
ALIAS: sse-version sse_version
|
||||
|
||||
M: x86.32 sse2? check_sse2 ;
|
||||
|
||||
M: x86.64 sse2? t ;
|
||||
: sse-string ( version -- string )
|
||||
{
|
||||
{ 00 [ "no SSE" ] }
|
||||
{ 10 [ "SSE1" ] }
|
||||
{ 20 [ "SSE2" ] }
|
||||
{ 30 [ "SSE3" ] }
|
||||
{ 33 [ "SSSE3" ] }
|
||||
{ 41 [ "SSE4.1" ] }
|
||||
{ 42 [ "SSE4.2" ] }
|
||||
} case ;
|
||||
|
||||
HOOK: instruction-count cpu ( -- n )
|
||||
|
||||
|
@ -23,3 +31,36 @@ M: x86 instruction-count read_timestamp_counter ;
|
|||
|
||||
: count-instructions ( quot -- n )
|
||||
instruction-count [ call ] dip instruction-count swap - ; inline
|
||||
|
||||
USING: cpu.x86.features cpu.x86.features.private ;
|
||||
|
||||
:: install-sse-check ( version -- )
|
||||
[
|
||||
sse-version version < [
|
||||
"This image was built to use " write
|
||||
version sse-string write
|
||||
" but your CPU supports " write
|
||||
sse-version sse-string write "." print
|
||||
"You will need to bootstrap Factor again." print
|
||||
flush
|
||||
1 exit
|
||||
] when
|
||||
] "cpu.x86" add-init-hook ;
|
||||
|
||||
: enable-sse ( version -- )
|
||||
{
|
||||
{ 00 [ ] }
|
||||
{ 10 [ ] }
|
||||
{ 20 [ enable-sse2 ] }
|
||||
{ 30 [ enable-sse3 ] }
|
||||
{ 33 [ enable-sse3 ] }
|
||||
{ 41 [ enable-sse3 ] }
|
||||
{ 42 [ enable-sse3 ] }
|
||||
} case ;
|
||||
|
||||
[ { sse_version } compile ] with-optimizer
|
||||
|
||||
"Checking for multimedia extensions: " write sse-version
|
||||
[ sse-string write " detected" print ]
|
||||
[ install-sse-check ]
|
||||
[ enable-sse ] tri
|
||||
|
|
|
@ -700,6 +700,11 @@ M: x86 small-enough? ( n -- ? )
|
|||
: enable-sse2 ( -- )
|
||||
enable-float-intrinsics
|
||||
enable-fsqrt
|
||||
enable-float-min/max ;
|
||||
enable-float-min/max
|
||||
enable-sse2-simd ;
|
||||
|
||||
: enable-sse3 ( -- )
|
||||
enable-sse2
|
||||
enable-sse3-simd ;
|
||||
|
||||
enable-min/max
|
||||
|
|
|
@ -4,7 +4,7 @@ USING: accessors alien.c-types byte-arrays cpu.architecture
|
|||
generalizations kernel math math.functions math.vectors
|
||||
math.vectors.simd.functor math.vectors.specialization parser
|
||||
prettyprint.custom sequences sequences.private
|
||||
specialized-arrays.double locals assocs literals ;
|
||||
specialized-arrays.double locals assocs literals words fry ;
|
||||
IN: math.vectors.simd
|
||||
|
||||
<PRIVATE
|
||||
|
@ -69,8 +69,20 @@ PRIVATE>
|
|||
|
||||
<PRIVATE
|
||||
|
||||
:: simd-vector-words ( class ctor elt-type assoc -- )
|
||||
class elt-type assoc {
|
||||
: supported-simd-ops ( assoc -- assoc' )
|
||||
{
|
||||
{ v+ (simd-v+) }
|
||||
{ v- (simd-v-) }
|
||||
{ v* (simd-v*) }
|
||||
{ v/ (simd-v/) }
|
||||
{ vmin (simd-vmin) }
|
||||
{ vmax (simd-vmax) }
|
||||
{ sum (simd-sum) }
|
||||
} [ nip "intrinsic" word-prop ] assoc-filter
|
||||
'[ drop _ key? ] assoc-filter ;
|
||||
|
||||
:: high-level-ops ( ctor -- assoc )
|
||||
{
|
||||
{ vneg [ [ dup v- ] keep v- ] }
|
||||
{ v. [ v* sum ] }
|
||||
{ n+v [ [ ctor execute ] dip v+ ] }
|
||||
|
@ -85,7 +97,10 @@ PRIVATE>
|
|||
{ norm [ norm-sq sqrt ] }
|
||||
{ normalize [ dup norm v/n ] }
|
||||
{ distance [ v- norm ] }
|
||||
} assoc-union
|
||||
} ;
|
||||
|
||||
:: simd-vector-words ( class ctor elt-type assoc -- )
|
||||
class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
|
||||
specialize-vector-words ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -44,17 +44,6 @@ DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, voi
|
|||
add $12,%esp /* pop args from the stack */
|
||||
ret /* return _with new stack_ */
|
||||
|
||||
/* cpu.x86.32 calls this */
|
||||
DEF(bool,check_sse2,(void)):
|
||||
push %ebx
|
||||
mov $1,%eax
|
||||
cpuid
|
||||
shr $26,%edx
|
||||
and $1,%edx
|
||||
pop %ebx
|
||||
mov %edx,%eax
|
||||
ret
|
||||
|
||||
DEF(long long,read_timestamp_counter,(void)):
|
||||
rdtsc
|
||||
ret
|
||||
|
@ -72,6 +61,5 @@ DEF(void,primitive_inline_cache_miss_tail,(void)):
|
|||
|
||||
#ifdef WINDOWS
|
||||
.section .drectve
|
||||
.ascii " -export:check_sse2"
|
||||
.ascii " -export:read_timestamp_counter"
|
||||
#endif
|
||||
|
|
37
vm/cpu-x86.S
37
vm/cpu-x86.S
|
@ -68,7 +68,44 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
|
|||
add $STACK_PADDING,STACK_REG
|
||||
jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */
|
||||
|
||||
/* cpu.x86.features calls this */
|
||||
DEF(bool,sse_version,(void)):
|
||||
mov $0x1,RETURN_REG
|
||||
cpuid
|
||||
test $0x100000,%ecx
|
||||
jnz sse_42
|
||||
test $0x80000,%ecx
|
||||
jnz sse_41
|
||||
test $0x200,%ecx
|
||||
jnz ssse_3
|
||||
test $0x1,%ecx
|
||||
jnz sse_3
|
||||
test $0x4000000,%edx
|
||||
jnz sse_2
|
||||
test $0x2000000,%edx
|
||||
jnz sse_1
|
||||
mov $0,%eax
|
||||
ret
|
||||
sse_42:
|
||||
mov $42,RETURN_REG
|
||||
ret
|
||||
sse_41:
|
||||
mov $41,RETURN_REG
|
||||
ret
|
||||
ssse_3:
|
||||
mov $33,RETURN_REG
|
||||
ret
|
||||
sse_3:
|
||||
mov $30,RETURN_REG
|
||||
ret
|
||||
sse_2:
|
||||
mov $20,RETURN_REG
|
||||
ret
|
||||
sse_1:
|
||||
mov $10,RETURN_REG
|
||||
ret
|
||||
#ifdef WINDOWS
|
||||
.section .drectve
|
||||
.ascii " -export:sse_version"
|
||||
.ascii " -export:c_to_factor"
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue