cpu.ppc: implement fast float function calls; 3x speedup on benchmark.struct-arrays on PowerPC
parent
cbaeda3195
commit
f91b539c31
|
@ -109,7 +109,6 @@ IN: compiler.cfg.intrinsics
|
|||
} enable-intrinsics ;
|
||||
|
||||
: enable-float-functions ( -- )
|
||||
! Everything except for fsqrt
|
||||
{
|
||||
{ math.libm:facos [ drop "acos" emit-unary-float-function ] }
|
||||
{ math.libm:fasin [ drop "asin" emit-unary-float-function ] }
|
||||
|
@ -127,6 +126,9 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.libm:facosh [ drop "acosh" emit-unary-float-function ] }
|
||||
{ math.libm:fasinh [ drop "asinh" emit-unary-float-function ] }
|
||||
{ math.libm:fatanh [ drop "atanh" emit-unary-float-function ] }
|
||||
{ math.libm:fsqrt [ drop "sqrt" emit-unary-float-function ] }
|
||||
{ math.floats.private:float-min [ drop "fmin" emit-binary-float-function ] }
|
||||
{ math.floats.private:float-max [ drop "fmax" emit-binary-float-function ] }
|
||||
} enable-intrinsics ;
|
||||
|
||||
: enable-min/max ( -- )
|
||||
|
|
|
@ -281,6 +281,23 @@ M:: ppc %box-float ( dst src temp -- )
|
|||
dst 16 float temp %allot
|
||||
src dst float-offset STFD ;
|
||||
|
||||
: float-function-param ( i spill-slot -- )
|
||||
[ float-regs param-regs nth 1 ] [ n>> spill@ ] bi* LFD ;
|
||||
|
||||
: float-function-return ( reg -- )
|
||||
float-regs return-reg 2dup = [ 2drop ] [ FMR ] if ;
|
||||
|
||||
M:: ppc %unary-float-function ( dst src func -- )
|
||||
0 src float-function-param
|
||||
func f %alien-invoke
|
||||
dst float-function-return ;
|
||||
|
||||
M:: ppc %binary-float-function ( dst src1 src2 func -- )
|
||||
0 src1 float-function-param
|
||||
1 src2 float-function-param
|
||||
func f %alien-invoke
|
||||
dst float-function-return ;
|
||||
|
||||
M:: ppc %unbox-any-c-ptr ( dst src temp -- )
|
||||
[
|
||||
{ "is-byte-array" "end" "start" } [ define-label ] each
|
||||
|
@ -681,6 +698,8 @@ M: ppc %unbox-small-struct ( size -- )
|
|||
{ 4 [ %unbox-struct-4 ] }
|
||||
} case ;
|
||||
|
||||
enable-float-functions
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
{
|
||||
|
|
|
@ -218,12 +218,12 @@ M:: x86.64 %binary-float-function ( dst src1 src2 func -- )
|
|||
! x86-64.
|
||||
enable-alien-4-intrinsics
|
||||
|
||||
! SSE2 is always available on x86-64.
|
||||
enable-sse2
|
||||
|
||||
! Enable fast calling of libc math functions
|
||||
enable-float-functions
|
||||
|
||||
! SSE2 is always available on x86-64.
|
||||
enable-sse2
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue