cpu.x86: use SQRTSD instruction for math.libm:fsqrt word

2009-08-25 23:22:15 -05:00 · 2009-08-25 23:22:15 -05:00 · 0df8aadce2
parent 86a8e016fe
commit 0df8aadce2
11 changed files with 29 additions and 22 deletions
--- a/basis/compiler/cfg/hats/hats.factor
+++ b/basis/compiler/cfg/hats/hats.factor
@ -43,6 +43,7 @@ IN: compiler.cfg.hats
 : ^^sub-float ( src1 src2 -- dst ) ^^r2 ##sub-float ; inline
 : ^^mul-float ( src1 src2 -- dst ) ^^r2 ##mul-float ; inline
 : ^^div-float ( src1 src2 -- dst ) ^^r2 ##div-float ; inline
+: ^^sqrt ( src -- dst ) ^^r1 ##sqrt ; inline
 : ^^float>integer ( src -- dst ) ^^r1 ##float>integer ; inline
 : ^^integer>float ( src -- dst ) ^^r1 ##integer>float ; inline
 : ^^allot ( size class -- dst ) ^^r2 next-vreg ##allot ; inline
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -106,6 +106,7 @@ INSN: ##add-float < ##commutative ;
 INSN: ##sub-float < ##binary ;
 INSN: ##mul-float < ##commutative ;
 INSN: ##div-float < ##binary ;
+INSN: ##sqrt < ##unary ;

 ! Float/integer conversion
 INSN: ##float>integer < ##unary ;
@ -256,6 +257,7 @@ UNION: output-float-insn
    ##sub-float
    ##mul-float
    ##div-float
+    ##sqrt
    ##integer>float
    ##unbox-float
    ##alien-float
@ -267,6 +269,7 @@ UNION: input-float-insn
    ##sub-float
    ##mul-float
    ##div-float
+    ##sqrt
    ##float>integer
    ##box-float
    ##set-alien-float
--- a/basis/compiler/cfg/intrinsics/float/float.factor
+++ b/basis/compiler/cfg/intrinsics/float/float.factor
@ -15,3 +15,6 @@ IN: compiler.cfg.intrinsics.float

 : emit-fixnum>float ( -- )
    ds-pop ^^untag-fixnum ^^integer>float ds-push ;
+
+: emit-fsqrt ( -- )
+    ds-pop ^^sqrt ds-push ;
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@ -19,6 +19,7 @@ QUALIFIED: strings.private
 QUALIFIED: classes.tuple.private
 QUALIFIED: math.private
 QUALIFIED: math.integers.private
+QUALIFIED: math.libm
 QUALIFIED: alien.accessors
 IN: compiler.cfg.intrinsics

@ -92,6 +93,9 @@ IN: compiler.cfg.intrinsics
        alien.accessors:set-alien-double
    } [ t "intrinsic" set-word-prop ] each ;

+: enable-fsqrt ( -- )
+    \ math.libm:fsqrt t "intrinsic" set-word-prop ;
+
 : enable-fixnum-log2 ( -- )
    \ math.integers.private:fixnum-log2 t "intrinsic" set-word-prop ;

@ -130,6 +134,7 @@ IN: compiler.cfg.intrinsics
        { \ math.private:float= [ drop cc= emit-float-comparison ] }
        { \ math.private:float>fixnum [ drop emit-float>fixnum ] }
        { \ math.private:fixnum>float [ drop emit-fixnum>float ] }
+        { \ math.libm:fsqrt [ drop emit-fsqrt ] }
        { \ slots.private:slot [ emit-slot ] }
        { \ slots.private:set-slot [ emit-set-slot ] }
        { \ strings.private:string-nth [ drop emit-string-nth ] }
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@ -170,6 +170,8 @@ M: ##sub-float generate-insn dst/src1/src2 %sub-float ;
 M: ##mul-float generate-insn dst/src1/src2 %mul-float ;
 M: ##div-float generate-insn dst/src1/src2 %div-float ;

+M: ##sqrt generate-insn dst/src %sqrt ;
+
 M: ##integer>float generate-insn dst/src %integer>float ;
 M: ##float>integer generate-insn dst/src %float>integer ;

--- a/basis/compiler/tree/propagation/known-words/known-words.factor
+++ b/basis/compiler/tree/propagation/known-words/known-words.factor
@ -2,11 +2,12 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel effects accessors math math.private
 math.integers.private math.partial-dispatch math.intervals
-math.parser math.order math.functions layouts words sequences sequences.private
-arrays assocs classes classes.algebra combinators generic.math
-splitting fry locals classes.tuple alien.accessors
-classes.tuple.private slots.private definitions strings.private
-vectors hashtables generic quotations
+math.parser math.order math.functions math.libm layouts words
+sequences sequences.private arrays assocs classes
+classes.algebra combinators generic.math splitting fry locals
+classes.tuple alien.accessors classes.tuple.private
+slots.private definitions strings.private vectors hashtables
+generic quotations
 stack-checker.state
 compiler.tree.comparisons
 compiler.tree.propagation.info
@ -297,3 +298,8 @@ generic-comparison-ops [
        bi
    ] [ 2drop object-info ] if
 ] "outputs" set-word-prop
+
+{ facos fasin fatan fatan2 fcos fsin ftan fcosh fsinh ftanh fexp
+flog fpow fsqrt facosh fasinh fatanh } [
+    { float } "default-output-classes" set-word-prop
+] each
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -110,6 +110,7 @@ HOOK: %add-float cpu ( dst src1 src2 -- )
 HOOK: %sub-float cpu ( dst src1 src2 -- )
 HOOK: %mul-float cpu ( dst src1 src2 -- )
 HOOK: %div-float cpu ( dst src1 src2 -- )
+HOOK: %sqrt cpu ( dst src -- )

 HOOK: %integer>float cpu ( dst src -- )
 HOOK: %float>integer cpu ( dst src -- )
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@ -304,6 +304,7 @@ USING: cpu.x86.features cpu.x86.features.private ;
    sse2? [
        " - yes" print
        enable-float-intrinsics
+        enable-fsqrt
        [
            sse2? [
                "This image was built to use SSE2, which your CPU does not support." print
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@ -203,6 +203,7 @@ enable-alien-4-intrinsics

 ! SSE2 is always available on x86-64.
 enable-float-intrinsics
+enable-fsqrt

 USE: vocabs.loader

--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -203,6 +203,7 @@ M: x86 %add-float nip ADDSD ;
 M: x86 %sub-float nip SUBSD ;
 M: x86 %mul-float nip MULSD ;
 M: x86 %div-float nip DIVSD ;
+M: x86 %sqrt SQRTSD ;

 M: x86 %integer>float CVTSI2SD ;
 M: x86 %float>integer CVTTSD2SI ;
--- a/basis/math/libm/libm.factor
+++ b/basis/math/libm/libm.factor
@ -5,69 +5,52 @@ IN: math.libm

 : facos ( x -- y )
    "double" "libm" "acos" { "double" } alien-invoke ;
-    inline

 : fasin ( x -- y )
    "double" "libm" "asin" { "double" } alien-invoke ;
-    inline

 : fatan ( x -- y )
    "double" "libm" "atan" { "double" } alien-invoke ;
-    inline

 : fatan2 ( x y -- z )
    "double" "libm" "atan2" { "double" "double" } alien-invoke ;
-    inline

 : fcos ( x -- y )
    "double" "libm" "cos" { "double" } alien-invoke ;
-    inline

 : fsin ( x -- y )
    "double" "libm" "sin" { "double" } alien-invoke ;
-    inline

 : ftan ( x -- y )
    "double" "libm" "tan" { "double" } alien-invoke ;
-    inline

 : fcosh ( x -- y )
    "double" "libm" "cosh" { "double" } alien-invoke ;
-    inline

 : fsinh ( x -- y )
    "double" "libm" "sinh" { "double" } alien-invoke ;
-    inline

 : ftanh ( x -- y )
    "double" "libm" "tanh" { "double" } alien-invoke ;
-    inline

 : fexp ( x -- y )
    "double" "libm" "exp" { "double" } alien-invoke ;
-    inline

 : flog ( x -- y )
    "double" "libm" "log" { "double" } alien-invoke ;
-    inline

 : fpow ( x y -- z )
    "double" "libm" "pow" { "double" "double" } alien-invoke ;
-    inline

 : fsqrt ( x -- y )
    "double" "libm" "sqrt" { "double" } alien-invoke ;
-    inline
    
 ! Windows doesn't have these...
 : facosh ( x -- y )
    "double" "libm" "acosh" { "double" } alien-invoke ;
-    inline

 : fasinh ( x -- y )
    "double" "libm" "asinh" { "double" } alien-invoke ;
-    inline

 : fatanh ( x -- y )
    "double" "libm" "atanh" { "double" } alien-invoke ;
-    inline