cpu.ppc: implement fast float function calls; 3x speedup on benchmark.struct-arrays on PowerPC

db4
Slava Pestov 2009-09-01 15:19:26 -05:00
parent cbaeda3195
commit f91b539c31
3 changed files with 25 additions and 4 deletions

View File

@ -109,7 +109,6 @@ IN: compiler.cfg.intrinsics
} enable-intrinsics ;
: enable-float-functions ( -- )
! Everything except for fsqrt
{
{ math.libm:facos [ drop "acos" emit-unary-float-function ] }
{ math.libm:fasin [ drop "asin" emit-unary-float-function ] }
@ -127,6 +126,9 @@ IN: compiler.cfg.intrinsics
{ math.libm:facosh [ drop "acosh" emit-unary-float-function ] }
{ math.libm:fasinh [ drop "asinh" emit-unary-float-function ] }
{ math.libm:fatanh [ drop "atanh" emit-unary-float-function ] }
{ math.libm:fsqrt [ drop "sqrt" emit-unary-float-function ] }
{ math.floats.private:float-min [ drop "fmin" emit-binary-float-function ] }
{ math.floats.private:float-max [ drop "fmax" emit-binary-float-function ] }
} enable-intrinsics ;
: enable-min/max ( -- )

View File

@ -281,6 +281,23 @@ M:: ppc %box-float ( dst src temp -- )
dst 16 float temp %allot
src dst float-offset STFD ;
: float-function-param ( i spill-slot -- )
[ float-regs param-regs nth 1 ] [ n>> spill@ ] bi* LFD ;
: float-function-return ( reg -- )
float-regs return-reg 2dup = [ 2drop ] [ FMR ] if ;
M:: ppc %unary-float-function ( dst src func -- )
0 src float-function-param
func f %alien-invoke
dst float-function-return ;
M:: ppc %binary-float-function ( dst src1 src2 func -- )
0 src1 float-function-param
1 src2 float-function-param
func f %alien-invoke
dst float-function-return ;
M:: ppc %unbox-any-c-ptr ( dst src temp -- )
[
{ "is-byte-array" "end" "start" } [ define-label ] each
@ -681,6 +698,8 @@ M: ppc %unbox-small-struct ( size -- )
{ 4 [ %unbox-struct-4 ] }
} case ;
enable-float-functions
USE: vocabs.loader
{

View File

@ -218,12 +218,12 @@ M:: x86.64 %binary-float-function ( dst src1 src2 func -- )
! x86-64.
enable-alien-4-intrinsics
! SSE2 is always available on x86-64.
enable-sse2
! Enable fast calling of libc math functions
enable-float-functions
! SSE2 is always available on x86-64.
enable-sse2
USE: vocabs.loader
{