cpu.x86.32: implement %unary-float-function and %binary-float-function; speeds up partial-sums and struct-arrays benchmarks
parent
afc7a20ab8
commit
91e63c0c6f
|
@ -282,6 +282,29 @@ M: x86.32 %callback-value ( ctype -- )
|
|||
! Unbox EAX
|
||||
unbox-return ;
|
||||
|
||||
:: float-function-param ( stack-slot dst src -- )
|
||||
! We can clobber dst here since its going to contain the
|
||||
! final result
|
||||
dst src n>> spill@ MOVSD
|
||||
stack-slot dst MOVSD ;
|
||||
|
||||
: float-function-return ( reg -- )
|
||||
ESP [] FSTPL
|
||||
ESP [] MOVSD
|
||||
ESP 16 ADD ;
|
||||
|
||||
M:: x86.32 %unary-float-function ( dst src func -- )
|
||||
ESP -16 [+] dst src float-function-param
|
||||
ESP 16 SUB
|
||||
func f %alien-invoke
|
||||
dst float-function-return ;
|
||||
|
||||
M:: x86.32 %binary-float-function ( dst src1 src2 func -- )
|
||||
ESP -16 [+] dst src1 float-function-param
|
||||
ESP -8 [+] dst src2 float-function-param
|
||||
ESP 16 SUB
|
||||
func f %alien-invoke
|
||||
dst float-function-return ;
|
||||
|
||||
M: x86.32 %cleanup ( params -- )
|
||||
#! a) If we just called an stdcall function in Windows, it
|
||||
|
|
|
@ -249,9 +249,6 @@ M:: x86.64 %call-gc ( gc-root-count temp -- )
|
|||
! x86-64.
|
||||
enable-alien-4-intrinsics
|
||||
|
||||
! Enable fast calling of libc math functions
|
||||
enable-float-functions
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
{
|
||||
|
|
|
@ -989,6 +989,7 @@ enable-fixnum-log2
|
|||
enable-float-intrinsics
|
||||
enable-fsqrt
|
||||
enable-float-min/max
|
||||
enable-float-functions
|
||||
install-sse2-check
|
||||
] when ;
|
||||
|
||||
|
|
Loading…
Reference in New Issue