cpu.x86.32: implement %unary-float-function and %binary-float-function; speeds up partial-sums and struct-arrays benchmarks

2009-09-27 18:06:30 -05:00 · 2009-09-27 18:06:30 -05:00 · 91e63c0c6f
parent afc7a20ab8
commit 91e63c0c6f
3 changed files with 24 additions and 3 deletions
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@ -282,6 +282,29 @@ M: x86.32 %callback-value ( ctype -- )
    ! Unbox EAX
    unbox-return ;

+:: float-function-param ( stack-slot dst src -- )
+    ! We can clobber dst here since its going to contain the
+    ! final result
+    dst src n>> spill@ MOVSD
+    stack-slot dst MOVSD ;
+
+: float-function-return ( reg -- )
+    ESP [] FSTPL
+    ESP [] MOVSD
+    ESP 16 ADD ;
+
+M:: x86.32 %unary-float-function ( dst src func -- )
+    ESP -16 [+] dst src float-function-param
+    ESP 16 SUB
+    func f %alien-invoke
+    dst float-function-return ;
+
+M:: x86.32 %binary-float-function ( dst src1 src2 func -- )
+    ESP -16 [+] dst src1 float-function-param
+    ESP  -8 [+] dst src2 float-function-param
+    ESP 16 SUB
+    func f %alien-invoke
+    dst float-function-return ;

 M: x86.32 %cleanup ( params -- )
    #! a) If we just called an stdcall function in Windows, it
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@ -249,9 +249,6 @@ M:: x86.64 %call-gc ( gc-root-count temp -- )
 ! x86-64.
 enable-alien-4-intrinsics

-! Enable fast calling of libc math functions
-enable-float-functions
-
 USE: vocabs.loader

 {
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -989,6 +989,7 @@ enable-fixnum-log2
        enable-float-intrinsics
        enable-fsqrt
        enable-float-min/max
+        enable-float-functions
        install-sse2-check
    ] when ;