Detect SSE version and enable the correct set of SIMD intrinsics

db4
Slava Pestov 2009-09-03 03:28:38 -05:00
parent ae051e0c9a
commit 906a0d212a
7 changed files with 113 additions and 44 deletions

View File

@ -295,22 +295,6 @@ os windows? [
4 "double" c-type (>>align)
] unless
USING: cpu.x86.features cpu.x86.features.private ;
USE: vocabs.loader
"-no-sse2" (command-line) member? [
[ { check_sse2 } compile ] with-optimizer
"Checking if your CPU supports SSE2..." print flush
sse2? [
" - yes" print
enable-sse2
[
sse2? [
"This image was built to use SSE2, which your CPU does not support." print
"You will need to bootstrap Factor again." print
flush
1 exit
] unless
] "cpu.x86" add-init-hook
] [ " - no" print ] if
] unless
"cpu.x86.features" require

View File

@ -221,12 +221,11 @@ enable-alien-4-intrinsics
! Enable fast calling of libc math functions
enable-float-functions
! SSE2 is always available on x86-64.
enable-sse2
USE: vocabs.loader
{
{ [ os unix? ] [ "cpu.x86.64.unix" require ] }
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
"cpu.x86.features" require

View File

@ -1,21 +1,29 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: system kernel math alien.syntax ;
USING: system kernel math alien.syntax combinators locals init io
cpu.x86 compiler compiler.units accessors ;
IN: cpu.x86.features
<PRIVATE
FUNCTION: bool check_sse2 ( ) ;
FUNCTION: int sse_version ( ) ;
FUNCTION: longlong read_timestamp_counter ( ) ;
PRIVATE>
HOOK: sse2? cpu ( -- ? )
ALIAS: sse-version sse_version
M: x86.32 sse2? check_sse2 ;
M: x86.64 sse2? t ;
: sse-string ( version -- string )
{
{ 00 [ "no SSE" ] }
{ 10 [ "SSE1" ] }
{ 20 [ "SSE2" ] }
{ 30 [ "SSE3" ] }
{ 33 [ "SSSE3" ] }
{ 41 [ "SSE4.1" ] }
{ 42 [ "SSE4.2" ] }
} case ;
HOOK: instruction-count cpu ( -- n )
@ -23,3 +31,36 @@ M: x86 instruction-count read_timestamp_counter ;
: count-instructions ( quot -- n )
instruction-count [ call ] dip instruction-count swap - ; inline
USING: cpu.x86.features cpu.x86.features.private ;
:: install-sse-check ( version -- )
[
sse-version version < [
"This image was built to use " write
version sse-string write
" but your CPU supports " write
sse-version sse-string write "." print
"You will need to bootstrap Factor again." print
flush
1 exit
] when
] "cpu.x86" add-init-hook ;
: enable-sse ( version -- )
{
{ 00 [ ] }
{ 10 [ ] }
{ 20 [ enable-sse2 ] }
{ 30 [ enable-sse3 ] }
{ 33 [ enable-sse3 ] }
{ 41 [ enable-sse3 ] }
{ 42 [ enable-sse3 ] }
} case ;
[ { sse_version } compile ] with-optimizer
"Checking for multimedia extensions: " write sse-version
[ sse-string write " detected" print ]
[ install-sse-check ]
[ enable-sse ] tri

View File

@ -700,6 +700,11 @@ M: x86 small-enough? ( n -- ? )
: enable-sse2 ( -- )
enable-float-intrinsics
enable-fsqrt
enable-float-min/max ;
enable-float-min/max
enable-sse2-simd ;
: enable-sse3 ( -- )
enable-sse2
enable-sse3-simd ;
enable-min/max

View File

@ -4,7 +4,7 @@ USING: accessors alien.c-types byte-arrays cpu.architecture
generalizations kernel math math.functions math.vectors
math.vectors.simd.functor math.vectors.specialization parser
prettyprint.custom sequences sequences.private
specialized-arrays.double locals assocs literals ;
specialized-arrays.double locals assocs literals words fry ;
IN: math.vectors.simd
<PRIVATE
@ -69,8 +69,20 @@ PRIVATE>
<PRIVATE
:: simd-vector-words ( class ctor elt-type assoc -- )
class elt-type assoc {
: supported-simd-ops ( assoc -- assoc' )
{
{ v+ (simd-v+) }
{ v- (simd-v-) }
{ v* (simd-v*) }
{ v/ (simd-v/) }
{ vmin (simd-vmin) }
{ vmax (simd-vmax) }
{ sum (simd-sum) }
} [ nip "intrinsic" word-prop ] assoc-filter
'[ drop _ key? ] assoc-filter ;
:: high-level-ops ( ctor -- assoc )
{
{ vneg [ [ dup v- ] keep v- ] }
{ v. [ v* sum ] }
{ n+v [ [ ctor execute ] dip v+ ] }
@ -85,7 +97,10 @@ PRIVATE>
{ norm [ norm-sq sqrt ] }
{ normalize [ dup norm v/n ] }
{ distance [ v- norm ] }
} assoc-union
} ;
:: simd-vector-words ( class ctor elt-type assoc -- )
class elt-type assoc supported-simd-ops ctor high-level-ops assoc-union
specialize-vector-words ;
PRIVATE>

View File

@ -44,17 +44,6 @@ DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, voi
add $12,%esp /* pop args from the stack */
ret /* return _with new stack_ */
/* cpu.x86.32 calls this */
DEF(bool,check_sse2,(void)):
push %ebx
mov $1,%eax
cpuid
shr $26,%edx
and $1,%edx
pop %ebx
mov %edx,%eax
ret
DEF(long long,read_timestamp_counter,(void)):
rdtsc
ret
@ -72,6 +61,5 @@ DEF(void,primitive_inline_cache_miss_tail,(void)):
#ifdef WINDOWS
.section .drectve
.ascii " -export:check_sse2"
.ascii " -export:read_timestamp_counter"
#endif

View File

@ -68,7 +68,44 @@ DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
add $STACK_PADDING,STACK_REG
jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */
/* cpu.x86.features calls this */
DEF(bool,sse_version,(void)):
mov $0x1,RETURN_REG
cpuid
test $0x100000,%ecx
jnz sse_42
test $0x80000,%ecx
jnz sse_41
test $0x200,%ecx
jnz ssse_3
test $0x1,%ecx
jnz sse_3
test $0x4000000,%edx
jnz sse_2
test $0x2000000,%edx
jnz sse_1
mov $0,%eax
ret
sse_42:
mov $42,RETURN_REG
ret
sse_41:
mov $41,RETURN_REG
ret
ssse_3:
mov $33,RETURN_REG
ret
sse_3:
mov $30,RETURN_REG
ret
sse_2:
mov $20,RETURN_REG
ret
sse_1:
mov $10,RETURN_REG
ret
#ifdef WINDOWS
.section .drectve
.ascii " -export:sse_version"
.ascii " -export:c_to_factor"
#endif