compiler.cfg.linear-scan now supports partial sync-points where all registers are spilled; taking advantage of this, there are new trigonometric intrinsics which yield a 2x performance boost on benchmark.struct-arrays and a 25% boost on benchmark.partial-sums

2009-08-30 04:52:01 -05:00 · 2009-08-30 04:52:01 -05:00 · 0db01f6d5f
parent 2d4ba8de4d
commit 0db01f6d5f
17 changed files with 257 additions and 91 deletions
--- a/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor
+++ b/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor
@ -14,13 +14,12 @@ GENERIC: compute-stack-frame* ( insn -- )
    frame-required? on
    stack-frame [ max-stack-frame ] change ;

-M: ##alien-invoke compute-stack-frame*
-    stack-frame>> request-stack-frame ;
+UNION: stack-frame-insn
+    ##alien-invoke
+    ##alien-indirect
+    ##alien-callback ;

-M: ##alien-indirect compute-stack-frame*
-    stack-frame>> request-stack-frame ;
-
-M: ##alien-callback compute-stack-frame*
+M: stack-frame-insn compute-stack-frame*
    stack-frame>> request-stack-frame ;

 M: ##call compute-stack-frame*
@ -40,6 +39,8 @@ M: insn compute-stack-frame*
    ] when ;

 \ _spill t frame-required? set-word-prop
+\ ##unary-float-function t frame-required? set-word-prop
+\ ##binary-float-function t frame-required? set-word-prop

 : compute-stack-frame ( insns -- )
    frame-required? off
--- a/basis/compiler/cfg/hats/hats.factor
+++ b/basis/compiler/cfg/hats/hats.factor
@ -47,6 +47,8 @@ IN: compiler.cfg.hats
 : ^^div-float ( src1 src2 -- dst ) ^^r2 ##div-float ; inline
 : ^^max-float ( src1 src2 -- dst ) ^^r2 ##max-float ; inline
 : ^^min-float ( src1 src2 -- dst ) ^^r2 ##min-float ; inline
+: ^^unary-float-function ( src func -- dst ) ^^r2 ##unary-float-function ; inline
+: ^^binary-float-function ( src1 src2 func -- dst ) ^^r3 ##binary-float-function ; inline
 : ^^sqrt ( src -- dst ) ^^r1 ##sqrt ; inline
 : ^^float>integer ( src -- dst ) ^^r1 ##float>integer ; inline
 : ^^integer>float ( src -- dst ) ^^r1 ##integer>float ; inline
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -112,6 +112,10 @@ INSN: ##min-float < ##binary ;
 INSN: ##max-float < ##binary ;
 INSN: ##sqrt < ##unary ;

+! libc intrinsics
+INSN: ##unary-float-function < ##unary func ;
+INSN: ##binary-float-function < ##binary func ;
+
 ! Float/integer conversion
 INSN: ##float>integer < ##unary ;
 INSN: ##integer>float < ##unary ;
@ -252,6 +256,11 @@ UNION: vreg-insn
    _compare-imm-branch
    _dispatch ;

+! Instructions that kill all live vregs but cannot trigger GC
+UNION: partial-sync-insn
+    ##unary-float-function
+    ##binary-float-function ;
+
 ! Instructions that kill all live vregs
 UNION: kill-vreg-insn
    ##call
@ -270,6 +279,8 @@ UNION: output-float-insn
    ##min-float
    ##max-float
    ##sqrt
+    ##unary-float-function
+    ##binary-float-function
    ##integer>float
    ##unbox-float
    ##alien-float
@ -284,6 +295,8 @@ UNION: input-float-insn
    ##min-float
    ##max-float
    ##sqrt
+    ##unary-float-function
+    ##binary-float-function
    ##float>integer
    ##box-float
    ##set-alien-float
--- a/basis/compiler/cfg/intrinsics/float/float.factor
+++ b/basis/compiler/cfg/intrinsics/float/float.factor
@ -18,3 +18,9 @@ IN: compiler.cfg.intrinsics.float

 : emit-fsqrt ( -- )
    ds-pop ^^sqrt ds-push ;
+
+: emit-unary-float-function ( func -- )
+    [ ds-pop ] dip ^^unary-float-function ds-push ;
+
+: emit-binary-float-function ( func -- )
+    [ 2inputs ] dip ^^binary-float-function ds-push ;
--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@ -108,6 +108,27 @@ IN: compiler.cfg.intrinsics
        math.floats.private:float-max
    } enable-intrinsics ;

+: enable-float-functions ( -- )
+    ! Everything except for fsqrt
+    {
+        math.libm:facos
+        math.libm:fasin
+        math.libm:fatan
+        math.libm:fatan2
+        math.libm:fcos
+        math.libm:fsin
+        math.libm:ftan
+        math.libm:fcosh
+        math.libm:fsinh
+        math.libm:ftanh
+        math.libm:fexp
+        math.libm:flog
+        math.libm:fpow
+        math.libm:facosh
+        math.libm:fasinh
+        math.libm:fatanh
+    } enable-intrinsics ;
+
 : enable-min/max ( -- )
    {
        math.integers.private:fixnum-min
@ -157,6 +178,22 @@ IN: compiler.cfg.intrinsics
        { \ math.floats.private:float-min [ drop [ ^^min-float ] emit-float-op ] }
        { \ math.floats.private:float-max [ drop [ ^^max-float ] emit-float-op ] }
        { \ math.libm:fsqrt [ drop emit-fsqrt ] }
+        { \ math.libm:facos [ drop "acos" emit-unary-float-function ] }
+        { \ math.libm:fasin [ drop "asin" emit-unary-float-function ] }
+        { \ math.libm:fatan [ drop "atan" emit-unary-float-function ] }
+        { \ math.libm:fatan2 [ drop "atan2" emit-binary-float-function ] }
+        { \ math.libm:fcos [ drop "cos" emit-unary-float-function ] }
+        { \ math.libm:fsin [ drop "sin" emit-unary-float-function ] }
+        { \ math.libm:ftan [ drop "tan" emit-unary-float-function ] }
+        { \ math.libm:fcosh [ drop "cosh" emit-unary-float-function ] }
+        { \ math.libm:fsinh [ drop "sinh" emit-unary-float-function ] }
+        { \ math.libm:ftanh [ drop "tanh" emit-unary-float-function ] }
+        { \ math.libm:fexp [ drop "exp" emit-unary-float-function ] }
+        { \ math.libm:flog [ drop "log" emit-unary-float-function ] }
+        { \ math.libm:fpow [ drop "pow" emit-binary-float-function ] }
+        { \ math.libm:facosh [ drop "acosh" emit-unary-float-function ] }
+        { \ math.libm:fasinh [ drop "asinh" emit-unary-float-function ] }
+        { \ math.libm:fatanh [ drop "atanh" emit-unary-float-function ] }
        { \ slots.private:slot [ emit-slot ] }
        { \ slots.private:set-slot [ emit-set-slot ] }
        { \ strings.private:string-nth [ drop emit-string-nth ] }
--- a/basis/compiler/cfg/linear-scan/allocation/allocation.factor
+++ b/basis/compiler/cfg/linear-scan/allocation/allocation.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2008, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs heaps kernel namespaces sequences fry math
-math.order combinators arrays sorting compiler.utilities
+math.order combinators arrays sorting compiler.utilities locals
 compiler.cfg.linear-scan.live-intervals
 compiler.cfg.linear-scan.allocation.spilling
 compiler.cfg.linear-scan.allocation.splitting
@ -34,22 +34,48 @@ IN: compiler.cfg.linear-scan.allocation
        [ drop assign-blocked-register ]
    } cond ;

-: handle-interval ( live-interval -- )
-    [
-        start>>
+: handle-sync-point ( n -- )
+    [ active-intervals get values ] dip
+    [ '[ [ _ spill ] each ] each ]
+    [ drop [ delete-all ] each ]
+    2bi ;
+
+:: handle-progress ( n sync? -- )
+    n {
        [ progress set ]
        [ deactivate-intervals ]
-        [ activate-intervals ] tri
-    ] [ assign-register ] bi ;
+        [ sync? [ handle-sync-point ] [ drop ] if ]
+        [ activate-intervals ]
+    } cleave ;
+
+GENERIC: handle ( obj -- )
+
+M: live-interval handle ( live-interval -- )
+    [ start>> f handle-progress ] [ assign-register ] bi ;
+
+M: sync-point handle ( sync-point -- )
+    n>> t handle-progress ;
+
+: smallest-heap ( heap1 heap2 -- heap )
+    ! If heap1 and heap2 have the same key, favors heap1.
+    [ [ heap-peek nip ] bi@ <= ] most ;

 : (allocate-registers) ( -- )
-    unhandled-intervals get [ handle-interval ] slurp-heap ;
+    {
+        { [ unhandled-intervals get heap-empty? ] [ unhandled-sync-points get ] }
+        { [ unhandled-sync-points get heap-empty? ] [ unhandled-intervals get ] }
+        ! If a live interval begins at the same location as a sync point,
+        ! process the sync point before the live interval. This ensures that the
+        ! return value of C function calls doesn't get spilled and reloaded
+        ! unnecessarily.
+        [ unhandled-sync-points get unhandled-intervals get smallest-heap ]
+    } cond dup heap-empty? [ drop ] [ heap-pop drop handle (allocate-registers) ] if ;

 : finish-allocation ( -- )
    active-intervals inactive-intervals
    [ get values [ handled-intervals get push-all ] each ] bi@ ;

-: allocate-registers ( live-intervals machine-registers -- live-intervals )
+: allocate-registers ( live-intervals sync-point machine-registers -- live-intervals )
    init-allocator
    init-unhandled
    (allocate-registers)
--- a/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor
+++ b/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor
@ -29,7 +29,7 @@ ERROR: bad-live-ranges interval ;
    2bi ;

 : assign-spill ( live-interval -- )
-    dup vreg>> assign-spill-slot >>spill-to drop ;
+    dup vreg>> vreg-spill-slot >>spill-to drop ;

 : spill-before ( before -- before/f )
    ! If the interval does not have any usages before the spill location,
@ -46,7 +46,7 @@ ERROR: bad-live-ranges interval ;
    ] if ;

 : assign-reload ( live-interval -- )
-    dup vreg>> assign-spill-slot >>reload-from drop ;
+    dup vreg>> vreg-spill-slot >>reload-from drop ;

 : spill-after ( after -- after/f )
    ! If the interval has no more usages after the spill location,
--- a/basis/compiler/cfg/linear-scan/allocation/state/state.factor
+++ b/basis/compiler/cfg/linear-scan/allocation/state/state.factor
@ -120,15 +120,19 @@ SYMBOL: unhandled-intervals
    rep-size cfg get
    [ swap [ align dup ] [ + ] bi ] change-spill-area-size drop ;

+! Minheap of sync points which still need to be processed
+SYMBOL: unhandled-sync-points
+
 ! Mapping from vregs to spill slots
 SYMBOL: spill-slots

-: assign-spill-slot ( vreg -- n )
+: vreg-spill-slot ( vreg -- n )
    spill-slots get [ rep-of next-spill-slot ] cache ;

 : init-allocator ( registers -- )
    registers set
    <min-heap> unhandled-intervals set
+    <min-heap> unhandled-sync-points set
    [ V{ } clone ] reg-class-assoc active-intervals set
    [ V{ } clone ] reg-class-assoc inactive-intervals set
    V{ } clone handled-intervals set
@ -136,9 +140,10 @@ SYMBOL: spill-slots
    H{ } clone spill-slots set
    -1 progress set ;

-: init-unhandled ( live-intervals -- )
-    [ [ start>> ] keep ] { } map>assoc
-    unhandled-intervals get heap-push-all ;
+: init-unhandled ( live-intervals sync-points -- )
+    [ [ [ start>> ] keep ] { } map>assoc unhandled-intervals get heap-push-all ]
+    [ [ [ n>> ] keep ] { } map>assoc unhandled-sync-points get heap-push-all ]
+    bi* ;

 ! A utility used by register-status and spill-status words
 : free-positions ( new -- assoc )
--- a/basis/compiler/cfg/linear-scan/assignment/assignment.factor
+++ b/basis/compiler/cfg/linear-scan/assignment/assignment.factor
@ -28,6 +28,20 @@ SYMBOL: pending-interval-assoc
 : remove-pending ( live-interval -- )
    vreg>> pending-interval-assoc get delete-at ;

+: (vreg>reg) ( vreg pending -- reg )
+    ! If a live vreg is not in the pending set, then it must
+    ! have been spilled.
+    ?at [ spill-slots get at <spill-slot> ] unless ;
+
+: vreg>reg ( vreg -- reg )
+    pending-interval-assoc get (vreg>reg) ;
+
+: vregs>regs ( vregs -- assoc )
+    dup assoc-empty? [
+        pending-interval-assoc get
+        '[ _ (vreg>reg) ] assoc-map
+    ] unless ;
+
 ! Minheap of live intervals which still need a register allocation
 SYMBOL: unhandled-intervals

@ -96,8 +110,6 @@ SYMBOL: register-live-outs

 GENERIC: assign-registers-in-insn ( insn -- )

-: vreg>reg ( vreg -- reg ) pending-interval-assoc get at ;
-
 RENAMING: assign [ vreg>reg ] [ vreg>reg ] [ vreg>reg ]

 M: vreg-insn assign-registers-in-insn
@ -123,7 +135,7 @@ M: vreg-insn assign-registers-in-insn
    [
        [
            2dup spill-on-gc?
-            [ swap [ assign-spill-slot ] [ rep-of ] bi 3array , ] [ 2drop ] if
+            [ swap [ vreg-spill-slot ] [ rep-of ] bi 3array , ] [ 2drop ] if
        ] assoc-each
    ] { } make ;

@ -137,23 +149,13 @@ M: ##gc assign-registers-in-insn

 M: insn assign-registers-in-insn drop ;

-: compute-live-values ( vregs -- assoc )
-    ! If a live vreg is not in active or inactive, then it must have been
-    ! spilled.
-    dup assoc-empty? [
-        pending-interval-assoc get
-        '[ _ ?at [ ] [ spill-slots get at <spill-slot> ] if ] assoc-map
-    ] unless ;
-
 : begin-block ( bb -- )
    dup basic-block set
    dup block-from activate-new-intervals
-    [ live-in compute-live-values ] keep
-    register-live-ins get set-at ;
+    [ live-in vregs>regs ] keep register-live-ins get set-at ;

 : end-block ( bb -- )
-    [ live-out compute-live-values ] keep
-    register-live-outs get set-at ;
+    [ live-out vregs>regs ] keep register-live-outs get set-at ;

 ERROR: bad-vreg vreg ;

--- a/basis/compiler/cfg/linear-scan/debugger/debugger.factor
+++ b/basis/compiler/cfg/linear-scan/debugger/debugger.factor
@ -9,6 +9,7 @@ IN: compiler.cfg.linear-scan.debugger
    [
        [ clone ] map dup [ [ vreg>> ] keep ] H{ } map>assoc
        live-intervals set
+        f
    ] dip
    allocate-registers drop ;

--- a/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor
+++ b/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor
@ -32,9 +32,12 @@ M: live-interval covers? ( insn# live-interval -- ? )
        
 ERROR: dead-value-error vreg ;

+: add-new-range ( from to live-interval -- )
+    [ <live-range> ] dip ranges>> push ;
+
 : shorten-range ( n live-interval -- )
    dup ranges>> empty?
-    [ vreg>> dead-value-error ] [ ranges>> last (>>from) ] if ;
+    [ dupd add-new-range ] [ ranges>> last (>>from) ] if ;

 : extend-range ( from to live-range -- )
    ranges>> last
@ -42,9 +45,6 @@ ERROR: dead-value-error vreg ;
    [ min ] change-from
    drop ;

-: add-new-range ( from to live-interval -- )
-    [ <live-range> ] dip ranges>> push ;
-
 : extend-range? ( to live-interval -- ? )
    ranges>> [ drop f ] [ last from>> >= ] if-empty ;

@ -52,8 +52,18 @@ ERROR: dead-value-error vreg ;
    2dup extend-range?
    [ extend-range ] [ add-new-range ] if ;

-: add-use ( n live-interval -- )
-    uses>> push ;
+GENERIC: operands-in-registers? ( insn -- ? )
+
+M: vreg-insn operands-in-registers? drop t ;
+
+M: partial-sync-insn operands-in-registers? drop f ;
+
+: add-def ( insn live-interval -- )
+    [ insn#>> ] [ uses>> ] bi* push ;
+
+: add-use ( insn live-interval -- )
+    ! Every use is a potential def, no SSA here baby!
+    over operands-in-registers? [ add-def ] [ 2drop ] if ;

 : <live-interval> ( vreg -- live-interval )
    \ live-interval new
@ -68,51 +78,68 @@ ERROR: dead-value-error vreg ;
 M: live-interval hashcode*
    nip [ start>> ] [ end>> 1000 * ] bi + ;

-M: live-interval clone
-    call-next-method [ clone ] change-uses ;
-
 ! Mapping from vreg to live-interval
 SYMBOL: live-intervals

-: live-interval ( vreg live-intervals -- live-interval )
-    [ <live-interval> ] cache ;
+: live-interval ( vreg -- live-interval )
+    live-intervals get [ <live-interval> ] cache ;

 GENERIC: compute-live-intervals* ( insn -- )

 M: insn compute-live-intervals* drop ;

-: handle-output ( n vreg live-intervals -- )
+: handle-output ( insn vreg -- )
    live-interval
-    [ add-use ] [ shorten-range ] 2bi ;
+    [ [ insn#>> ] dip shorten-range ] [ add-def ] 2bi ;

-: handle-input ( n vreg live-intervals -- )
+: handle-input ( insn vreg -- )
    live-interval
-    [ [ basic-block get block-from ] 2dip add-range ] [ add-use ] 2bi ;
+    [ [ [ basic-block get block-from ] dip insn#>> ] dip add-range ] [ add-use ] 2bi ;

-: handle-temp ( n vreg live-intervals -- )
+: handle-temp ( insn vreg -- )
    live-interval
-    [ dupd add-range ] [ add-use ] 2bi ;
+    [ [ insn#>> dup ] dip add-range ] [ add-use ] 2bi ;

 M: vreg-insn compute-live-intervals*
-    dup insn#>>
-    live-intervals get
-    [ [ defs-vreg ] 2dip '[ [ _ ] dip _ handle-output ] when* ]
-    [ [ uses-vregs ] 2dip '[ [ _ ] dip _ handle-input ] each ]
-    [ [ temp-vregs ] 2dip '[ [ _ ] dip _ handle-temp ] each ]
-    3tri ;
+    [ dup defs-vreg [ handle-output ] with when* ]
+    [ dup uses-vregs [ handle-input ] with each ]
+    [ dup temp-vregs [ handle-temp ] with each ]
+    tri ;

 : handle-live-out ( bb -- )
-    live-out keys
-    basic-block get [ block-from ] [ block-to ] bi
-    live-intervals get '[
-        [ _ _ ] dip _ live-interval add-range
-    ] each ;
+    [ block-from ] [ block-to ] [ live-out keys ] tri
+    [ live-interval add-range ] with with each ;
+
+! A location where all registers have to be spilled
+TUPLE: sync-point n ;
+
+C: <sync-point> sync-point
+
+! Sequence of sync points
+SYMBOL: sync-points
+
+GENERIC: compute-sync-points* ( insn -- )
+
+M: partial-sync-insn compute-sync-points*
+    insn#>> <sync-point> sync-points get push ;
+
+M: insn compute-sync-points* drop ;

 : compute-live-intervals-step ( bb -- )
    [ basic-block set ]
    [ handle-live-out ]
-    [ instructions>> <reversed> [ compute-live-intervals* ] each ] tri ;
+    [
+        instructions>> <reversed> [
+            [ compute-live-intervals* ]
+            [ compute-sync-points* ]
+            bi
+        ] each
+    ] tri ;

+: init-live-intervals ( -- )
+    H{ } clone live-intervals set
+    V{ } clone sync-points set ;
+    
 : compute-start/end ( live-interval -- )
    dup ranges>> [ first from>> ] [ last to>> ] bi
    [ >>start ] [ >>end ] bi* drop ;
@ -122,10 +149,10 @@ ERROR: bad-live-interval live-interval ;
 : check-start ( live-interval -- )
    dup start>> -1 = [ bad-live-interval ] [ drop ] if ;

-: finish-live-intervals ( live-intervals -- )
+: finish-live-intervals ( live-intervals -- seq )
    ! Since live intervals are computed in a backward order, we have
    ! to reverse some sequences, and compute the start and end.
-    [
+    values dup [
        {
            [ ranges>> reverse-here ]
            [ uses>> reverse-here ]
@ -134,12 +161,11 @@ ERROR: bad-live-interval live-interval ;
        } cleave
    ] each ;

-: compute-live-intervals ( cfg -- live-intervals )
-    H{ } clone [
-        live-intervals set
-        linearization-order <reversed>
-        [ compute-live-intervals-step ] each
-    ] keep values dup finish-live-intervals ;
+: compute-live-intervals ( cfg -- live-intervals sync-points )
+    init-live-intervals
+    linearization-order <reversed> [ compute-live-intervals-step ] each
+    live-intervals get finish-live-intervals
+    sync-points get ;

 : relevant-ranges ( interval1 interval2 -- ranges1 ranges2 )
    [ [ ranges>> ] bi@ ] [ nip start>> ] 2bi '[ to>> _ >= ] filter ;
--- a/basis/compiler/cfg/value-numbering/expressions/expressions.factor
+++ b/basis/compiler/cfg/value-numbering/expressions/expressions.factor
@ -12,6 +12,8 @@ TUPLE: commutative-expr < binary-expr ;
 TUPLE: compare-expr < binary-expr cc ;
 TUPLE: constant-expr < expr value ;
 TUPLE: reference-expr < expr value ;
+TUPLE: unary-float-function-expr < expr in func ;
+TUPLE: binary-float-function-expr < expr in1 in2 func ;
 TUPLE: box-displaced-alien-expr < expr displacement base base-class ;

 : <constant> ( constant -- expr )
@ -94,6 +96,19 @@ M: ##box-displaced-alien >expr
        [ base-class>> ]
    } cleave box-displaced-alien-expr boa ;

+M: ##unary-float-function >expr
+    [ class ] [ src>> vreg>vn ] [ func>> ] tri
+    unary-float-function-expr boa ;
+
+M: ##binary-float-function >expr
+    {
+        [ class ]
+        [ src1>> vreg>vn ]
+        [ src2>> vreg>vn ]
+        [ func>> ]
+    } cleave
+    binary-float-function-expr boa ;
+
 M: ##flushable >expr drop next-input-expr ;

 : init-expressions ( -- )
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@ -176,6 +176,12 @@ M: ##max-float generate-insn dst/src1/src2 %max-float ;

 M: ##sqrt generate-insn dst/src %sqrt ;

+M: ##unary-float-function generate-insn
+    [ dst/src ] [ func>> ] bi %unary-float-function ;
+
+M: ##binary-float-function generate-insn
+    [ dst/src1/src2 ] [ func>> ] bi %binary-float-function ;
+
 M: ##integer>float generate-insn dst/src %integer>float ;
 M: ##float>integer generate-insn dst/src %float>integer ;

--- a/basis/compiler/tests/codegen.factor
+++ b/basis/compiler/tests/codegen.factor
@ -3,7 +3,7 @@ math hashtables.private math.private namespaces sequences tools.test
 namespaces.private slots.private sequences.private byte-arrays alien
 alien.accessors layouts words definitions compiler.units io
 combinators vectors grouping make alien.c-types combinators.short-circuit
-math.order ;
+math.order math.libm ;
 QUALIFIED: namespaces.private
 IN: compiler.tests.codegen

@ -407,4 +407,9 @@ cell 4 = [
 : missing-gc-check-1 ( a -- b ) { fixnum } declare <alien> ;
 : missing-gc-check-2 ( -- ) 10000000 [ missing-gc-check-1 drop ] each-integer ;

-[ ] [ missing-gc-check-2 ] unit-test
+[ ] [ missing-gc-check-2 ] unit-test
+
+[ 1 0.169967142900241 ] [ 1.4 [ 1 swap fcos ] compile-call ] unit-test
+[ 1 0.169967142900241 ] [ 1.4 1 [ swap fcos ] compile-call ] unit-test
+[ 0.169967142900241 0.9854497299884601 ] [ 1.4 [ [ fcos ] [ fsin ] bi ] compile-call ] unit-test
+[ 1 0.169967142900241 0.9854497299884601 ] [ 1.4 1 [ swap >float [ fcos ] [ fsin ] bi ] compile-call ] unit-test
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -115,6 +115,8 @@ HOOK: %div-float cpu ( dst src1 src2 -- )
 HOOK: %min-float cpu ( dst src1 src2 -- )
 HOOK: %max-float cpu ( dst src1 src2 -- )
 HOOK: %sqrt cpu ( dst src -- )
+HOOK: %unary-float-function cpu ( dst src func -- )
+HOOK: %binary-float-function cpu ( dst src1 src2 func -- )

 HOOK: %integer>float cpu ( dst src -- )
 HOOK: %float>integer cpu ( dst src -- )
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@ -197,6 +197,23 @@ M: x86.64 %callback-value ( ctype -- )
    ! Unbox former top of data stack to return registers
    unbox-return ;

+: float-function-param ( i spill-slot -- )
+    [ float-regs param-regs nth ] [ n>> spill@ ] bi* MOVSD ;
+
+: float-function-return ( reg -- )
+    float-regs return-reg double-float-rep copy-register ;
+
+M:: x86.64 %unary-float-function ( dst src func -- )
+    0 src float-function-param
+    func f %alien-invoke
+    dst float-function-return ;
+
+M:: x86.64 %binary-float-function ( dst src1 src2 func -- )
+    0 src1 float-function-param
+    1 src2 float-function-param
+    func f %alien-invoke
+    dst float-function-return ;
+
 ! The result of reading 4 bytes from memory is a fixnum on
 ! x86-64.
 enable-alien-4-intrinsics
@ -204,6 +221,9 @@ enable-alien-4-intrinsics
 ! SSE2 is always available on x86-64.
 enable-sse2

+! Enable fast calling of libc math functions
+enable-float-functions
+
 USE: vocabs.loader

 {
--- a/basis/math/libm/libm.factor
+++ b/basis/math/libm/libm.factor
@ -4,54 +4,53 @@ USING: alien ;
 IN: math.libm

 : facos ( x -- y )
-    "double" "libm" "acos" { "double" } alien-invoke ; inline
+    "double" "libm" "acos" { "double" } alien-invoke ;

 : fasin ( x -- y )
-    "double" "libm" "asin" { "double" } alien-invoke ; inline
+    "double" "libm" "asin" { "double" } alien-invoke ;

 : fatan ( x -- y )
-    "double" "libm" "atan" { "double" } alien-invoke ; inline
+    "double" "libm" "atan" { "double" } alien-invoke ;

 : fatan2 ( x y -- z )
-    "double" "libm" "atan2" { "double" "double" } alien-invoke ; inline
+    "double" "libm" "atan2" { "double" "double" } alien-invoke ;

 : fcos ( x -- y )
-    "double" "libm" "cos" { "double" } alien-invoke ; inline
+    "double" "libm" "cos" { "double" } alien-invoke ;

 : fsin ( x -- y )
-    "double" "libm" "sin" { "double" } alien-invoke ; inline
+    "double" "libm" "sin" { "double" } alien-invoke ;

 : ftan ( x -- y )
-    "double" "libm" "tan" { "double" } alien-invoke ; inline
+    "double" "libm" "tan" { "double" } alien-invoke ;

 : fcosh ( x -- y )
-    "double" "libm" "cosh" { "double" } alien-invoke ; inline
+    "double" "libm" "cosh" { "double" } alien-invoke ;

 : fsinh ( x -- y )
-    "double" "libm" "sinh" { "double" } alien-invoke ; inline
+    "double" "libm" "sinh" { "double" } alien-invoke ;

 : ftanh ( x -- y )
-    "double" "libm" "tanh" { "double" } alien-invoke ; inline
+    "double" "libm" "tanh" { "double" } alien-invoke ;

 : fexp ( x -- y )
-    "double" "libm" "exp" { "double" } alien-invoke ; inline
+    "double" "libm" "exp" { "double" } alien-invoke ;

 : flog ( x -- y )
-    "double" "libm" "log" { "double" } alien-invoke ; inline
+    "double" "libm" "log" { "double" } alien-invoke ;

 : fpow ( x y -- z )
-    "double" "libm" "pow" { "double" "double" } alien-invoke ; inline
+    "double" "libm" "pow" { "double" "double" } alien-invoke ;

-! Don't inline fsqrt -- its an intrinsic!
 : fsqrt ( x -- y )
    "double" "libm" "sqrt" { "double" } alien-invoke ;
    
 ! Windows doesn't have these...
 : facosh ( x -- y )
-    "double" "libm" "acosh" { "double" } alien-invoke ; inline
+    "double" "libm" "acosh" { "double" } alien-invoke ;

 : fasinh ( x -- y )
-    "double" "libm" "asinh" { "double" } alien-invoke ; inline
+    "double" "libm" "asinh" { "double" } alien-invoke ;

 : fatanh ( x -- y )
-    "double" "libm" "atanh" { "double" } alien-invoke ; inline
+    "double" "libm" "atanh" { "double" } alien-invoke ;