compiler: add fixnum-min/max intrinsics; ~10% speedup on benchmark.yuv-to-rgb
parent
4197891499
commit
2bb6293217
|
@ -35,6 +35,8 @@ IN: compiler.cfg.hats
|
||||||
: ^^shr-imm ( src1 src2 -- dst ) ^^r2 ##shr-imm ; inline
|
: ^^shr-imm ( src1 src2 -- dst ) ^^r2 ##shr-imm ; inline
|
||||||
: ^^sar ( src1 src2 -- dst ) ^^r2 ##sar ; inline
|
: ^^sar ( src1 src2 -- dst ) ^^r2 ##sar ; inline
|
||||||
: ^^sar-imm ( src1 src2 -- dst ) ^^r2 ##sar-imm ; inline
|
: ^^sar-imm ( src1 src2 -- dst ) ^^r2 ##sar-imm ; inline
|
||||||
|
: ^^min ( src1 src2 -- dst ) ^^r2 ##min ; inline
|
||||||
|
: ^^max ( src1 src2 -- dst ) ^^r2 ##max ; inline
|
||||||
: ^^not ( src -- dst ) ^^r1 ##not ; inline
|
: ^^not ( src -- dst ) ^^r1 ##not ; inline
|
||||||
: ^^log2 ( src -- dst ) ^^r1 ##log2 ; inline
|
: ^^log2 ( src -- dst ) ^^r1 ##log2 ; inline
|
||||||
: ^^bignum>integer ( src -- dst ) ^^r1 next-vreg ##bignum>integer ; inline
|
: ^^bignum>integer ( src -- dst ) ^^r1 next-vreg ##bignum>integer ; inline
|
||||||
|
|
|
@ -91,6 +91,8 @@ INSN: ##shr < ##binary ;
|
||||||
INSN: ##shr-imm < ##binary-imm ;
|
INSN: ##shr-imm < ##binary-imm ;
|
||||||
INSN: ##sar < ##binary ;
|
INSN: ##sar < ##binary ;
|
||||||
INSN: ##sar-imm < ##binary-imm ;
|
INSN: ##sar-imm < ##binary-imm ;
|
||||||
|
INSN: ##min < ##binary ;
|
||||||
|
INSN: ##max < ##binary ;
|
||||||
INSN: ##not < ##unary ;
|
INSN: ##not < ##unary ;
|
||||||
INSN: ##log2 < ##unary ;
|
INSN: ##log2 < ##unary ;
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,9 @@ QUALIFIED: math.floats.private
|
||||||
QUALIFIED: math.libm
|
QUALIFIED: math.libm
|
||||||
IN: compiler.cfg.intrinsics
|
IN: compiler.cfg.intrinsics
|
||||||
|
|
||||||
|
: enable-intrinsics ( words -- )
|
||||||
|
[ t "intrinsic" set-word-prop ] each ;
|
||||||
|
|
||||||
{
|
{
|
||||||
kernel.private:tag
|
kernel.private:tag
|
||||||
kernel.private:getenv
|
kernel.private:getenv
|
||||||
|
@ -67,7 +70,7 @@ IN: compiler.cfg.intrinsics
|
||||||
alien.accessors:set-alien-signed-2
|
alien.accessors:set-alien-signed-2
|
||||||
alien.accessors:alien-cell
|
alien.accessors:alien-cell
|
||||||
alien.accessors:set-alien-cell
|
alien.accessors:set-alien-cell
|
||||||
} [ t "intrinsic" set-word-prop ] each
|
} enable-intrinsics
|
||||||
|
|
||||||
: enable-alien-4-intrinsics ( -- )
|
: enable-alien-4-intrinsics ( -- )
|
||||||
{
|
{
|
||||||
|
@ -75,7 +78,7 @@ IN: compiler.cfg.intrinsics
|
||||||
alien.accessors:set-alien-unsigned-4
|
alien.accessors:set-alien-unsigned-4
|
||||||
alien.accessors:alien-signed-4
|
alien.accessors:alien-signed-4
|
||||||
alien.accessors:set-alien-signed-4
|
alien.accessors:set-alien-signed-4
|
||||||
} [ t "intrinsic" set-word-prop ] each ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
: enable-float-intrinsics ( -- )
|
: enable-float-intrinsics ( -- )
|
||||||
{
|
{
|
||||||
|
@ -94,7 +97,7 @@ IN: compiler.cfg.intrinsics
|
||||||
alien.accessors:set-alien-float
|
alien.accessors:set-alien-float
|
||||||
alien.accessors:alien-double
|
alien.accessors:alien-double
|
||||||
alien.accessors:set-alien-double
|
alien.accessors:set-alien-double
|
||||||
} [ t "intrinsic" set-word-prop ] each ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
: enable-fsqrt ( -- )
|
: enable-fsqrt ( -- )
|
||||||
\ math.libm:fsqrt t "intrinsic" set-word-prop ;
|
\ math.libm:fsqrt t "intrinsic" set-word-prop ;
|
||||||
|
@ -103,10 +106,16 @@ IN: compiler.cfg.intrinsics
|
||||||
{
|
{
|
||||||
math.floats.private:float-min
|
math.floats.private:float-min
|
||||||
math.floats.private:float-max
|
math.floats.private:float-max
|
||||||
} [ t "intrinsic" set-word-prop ] each ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
|
: enable-min/max ( -- )
|
||||||
|
{
|
||||||
|
math.integers.private:fixnum-min
|
||||||
|
math.integers.private:fixnum-max
|
||||||
|
} enable-intrinsics ;
|
||||||
|
|
||||||
: enable-fixnum-log2 ( -- )
|
: enable-fixnum-log2 ( -- )
|
||||||
\ math.integers.private:fixnum-log2 t "intrinsic" set-word-prop ;
|
{ math.integers.private:fixnum-log2 } enable-intrinsics ;
|
||||||
|
|
||||||
: emit-intrinsic ( node word -- )
|
: emit-intrinsic ( node word -- )
|
||||||
{
|
{
|
||||||
|
@ -130,6 +139,8 @@ IN: compiler.cfg.intrinsics
|
||||||
{ \ math.private:fixnum>= [ drop cc>= emit-fixnum-comparison ] }
|
{ \ math.private:fixnum>= [ drop cc>= emit-fixnum-comparison ] }
|
||||||
{ \ math.private:fixnum> [ drop cc> emit-fixnum-comparison ] }
|
{ \ math.private:fixnum> [ drop cc> emit-fixnum-comparison ] }
|
||||||
{ \ kernel:eq? [ drop cc= emit-fixnum-comparison ] }
|
{ \ kernel:eq? [ drop cc= emit-fixnum-comparison ] }
|
||||||
|
{ \ math.integers.private:fixnum-min [ drop [ ^^min ] emit-fixnum-op ] }
|
||||||
|
{ \ math.integers.private:fixnum-max [ drop [ ^^max ] emit-fixnum-op ] }
|
||||||
{ \ math.private:bignum>fixnum [ drop emit-bignum>fixnum ] }
|
{ \ math.private:bignum>fixnum [ drop emit-bignum>fixnum ] }
|
||||||
{ \ math.private:fixnum>bignum [ drop emit-fixnum>bignum ] }
|
{ \ math.private:fixnum>bignum [ drop emit-fixnum>bignum ] }
|
||||||
{ \ math.private:float+ [ drop [ ^^add-float ] emit-float-op ] }
|
{ \ math.private:float+ [ drop [ ^^add-float ] emit-float-op ] }
|
||||||
|
|
|
@ -35,6 +35,8 @@ UNION: two-operand-insn
|
||||||
##shr-imm
|
##shr-imm
|
||||||
##sar
|
##sar
|
||||||
##sar-imm
|
##sar-imm
|
||||||
|
##min
|
||||||
|
##max
|
||||||
##fixnum-overflow
|
##fixnum-overflow
|
||||||
##add-float
|
##add-float
|
||||||
##sub-float
|
##sub-float
|
||||||
|
|
|
@ -149,6 +149,8 @@ M: ##shr generate-insn dst/src1/src2 %shr ;
|
||||||
M: ##shr-imm generate-insn dst/src1/src2 %shr-imm ;
|
M: ##shr-imm generate-insn dst/src1/src2 %shr-imm ;
|
||||||
M: ##sar generate-insn dst/src1/src2 %sar ;
|
M: ##sar generate-insn dst/src1/src2 %sar ;
|
||||||
M: ##sar-imm generate-insn dst/src1/src2 %sar-imm ;
|
M: ##sar-imm generate-insn dst/src1/src2 %sar-imm ;
|
||||||
|
M: ##min generate-insn dst/src1/src2 %min ;
|
||||||
|
M: ##max generate-insn dst/src1/src2 %max ;
|
||||||
M: ##not generate-insn dst/src %not ;
|
M: ##not generate-insn dst/src %not ;
|
||||||
M: ##log2 generate-insn dst/src %log2 ;
|
M: ##log2 generate-insn dst/src %log2 ;
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
USING: accessors arrays compiler.units kernel kernel.private math
|
USING: accessors arrays compiler.units kernel kernel.private
|
||||||
math.constants math.private sequences strings tools.test words
|
math math.constants math.private math.integers.private sequences
|
||||||
continuations sequences.private hashtables.private byte-arrays
|
strings tools.test words continuations sequences.private
|
||||||
system random layouts vectors
|
hashtables.private byte-arrays system random layouts vectors
|
||||||
sbufs strings.private slots.private alien math.order
|
sbufs strings.private slots.private alien math.order
|
||||||
alien.accessors alien.c-types alien.syntax alien.strings
|
alien.accessors alien.c-types alien.syntax alien.strings
|
||||||
namespaces libc io.encodings.ascii
|
namespaces libc io.encodings.ascii classes compiler ;
|
||||||
classes compiler ;
|
|
||||||
IN: compiler.tests.intrinsics
|
IN: compiler.tests.intrinsics
|
||||||
|
|
||||||
! Make sure that intrinsic ops compile to correct code.
|
! Make sure that intrinsic ops compile to correct code.
|
||||||
|
@ -271,6 +270,15 @@ cell 8 = [
|
||||||
[ 100000 swap array-nth ] compile-call
|
[ 100000 swap array-nth ] compile-call
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ 2 ] [ 2 4 [ fixnum-min ] compile-call ] unit-test
|
||||||
|
[ 2 ] [ 4 2 [ fixnum-min ] compile-call ] unit-test
|
||||||
|
[ 4 ] [ 2 4 [ fixnum-max ] compile-call ] unit-test
|
||||||
|
[ 4 ] [ 4 2 [ fixnum-max ] compile-call ] unit-test
|
||||||
|
[ -2 ] [ -2 -4 [ fixnum-max ] compile-call ] unit-test
|
||||||
|
[ -2 ] [ -4 -2 [ fixnum-max ] compile-call ] unit-test
|
||||||
|
[ -4 ] [ -2 -4 [ fixnum-min ] compile-call ] unit-test
|
||||||
|
[ -4 ] [ -4 -2 [ fixnum-min ] compile-call ] unit-test
|
||||||
|
|
||||||
! 64-bit overflow
|
! 64-bit overflow
|
||||||
cell 8 = [
|
cell 8 = [
|
||||||
[ t ] [ 1 59 fixnum-shift dup [ fixnum+ ] compile-call 1 60 fixnum-shift = ] unit-test
|
[ t ] [ 1 59 fixnum-shift dup [ fixnum+ ] compile-call 1 60 fixnum-shift = ] unit-test
|
||||||
|
|
|
@ -79,11 +79,16 @@ IN: compiler.tree.propagation.known-words
|
||||||
] unless ;
|
] unless ;
|
||||||
|
|
||||||
: ensure-math-class ( class must-be -- class' )
|
: ensure-math-class ( class must-be -- class' )
|
||||||
[ class<= ] 2keep ? ;
|
[ class<= ] most ;
|
||||||
|
|
||||||
: number-valued ( class interval -- class' interval' )
|
: number-valued ( class interval -- class' interval' )
|
||||||
[ number ensure-math-class ] dip ;
|
[ number ensure-math-class ] dip ;
|
||||||
|
|
||||||
|
: fixnum-valued ( class interval -- class' interval' )
|
||||||
|
over null-class? [
|
||||||
|
[ drop fixnum ] dip
|
||||||
|
] unless ;
|
||||||
|
|
||||||
: integer-valued ( class interval -- class' interval' )
|
: integer-valued ( class interval -- class' interval' )
|
||||||
[ integer ensure-math-class ] dip ;
|
[ integer ensure-math-class ] dip ;
|
||||||
|
|
||||||
|
@ -304,7 +309,15 @@ flog fpow fsqrt facosh fasinh fatanh } [
|
||||||
{ float } "default-output-classes" set-word-prop
|
{ float } "default-output-classes" set-word-prop
|
||||||
] each
|
] each
|
||||||
|
|
||||||
{ float-min float-max } [
|
! Find a less repetitive way of doing this
|
||||||
[ { float float } "input-classes" set-word-prop ]
|
\ float-min { float float } "input-classes" set-word-prop
|
||||||
[ { float } "default-output-classes" set-word-prop ] bi
|
\ float-min [ interval-min ] [ float-valued ] binary-op
|
||||||
] each
|
|
||||||
|
\ float-max { float float } "input-classes" set-word-prop
|
||||||
|
\ float-max [ interval-max ] [ float-valued ] binary-op
|
||||||
|
|
||||||
|
\ fixnum-min { fixnum fixnum } "input-classes" set-word-prop
|
||||||
|
\ fixnum-min [ interval-min ] [ fixnum-valued ] binary-op
|
||||||
|
|
||||||
|
\ fixnum-max { fixnum fixnum } "input-classes" set-word-prop
|
||||||
|
\ fixnum-max [ interval-max ] [ fixnum-valued ] binary-op
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg.
|
! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel sequences words fry generic accessors classes.tuple
|
USING: kernel sequences words fry generic accessors
|
||||||
classes classes.algebra definitions stack-checker.state quotations
|
classes.tuple classes classes.algebra definitions
|
||||||
classes.tuple.private math math.partial-dispatch math.private
|
stack-checker.state quotations classes.tuple.private math
|
||||||
math.intervals math.floats.private layouts math.order vectors hashtables
|
math.partial-dispatch math.private math.intervals
|
||||||
combinators effects generalizations assocs sets
|
math.floats.private math.integers.private layouts math.order
|
||||||
combinators.short-circuit sequences.private locals
|
vectors hashtables combinators effects generalizations assocs
|
||||||
|
sets combinators.short-circuit sequences.private locals
|
||||||
stack-checker namespaces compiler.tree.propagation.info ;
|
stack-checker namespaces compiler.tree.propagation.info ;
|
||||||
IN: compiler.tree.propagation.transforms
|
IN: compiler.tree.propagation.transforms
|
||||||
|
|
||||||
|
@ -79,15 +80,25 @@ IN: compiler.tree.propagation.transforms
|
||||||
] [ f ] if
|
] [ f ] if
|
||||||
] "custom-inlining" set-word-prop
|
] "custom-inlining" set-word-prop
|
||||||
|
|
||||||
{
|
! Integrate this with generic arithmetic optimization instead?
|
||||||
{ min [ float-min ] }
|
: both-inputs? ( #call class -- ? )
|
||||||
{ max [ float-max ] }
|
[ in-d>> first2 ] dip '[ value-info class>> _ class<= ] both? ;
|
||||||
} [
|
|
||||||
'[
|
\ min [
|
||||||
in-d>> first2 [ value-info class>> float class<= ] both?
|
{
|
||||||
[ _ ] [ f ] if
|
{ [ dup fixnum both-inputs? ] [ [ fixnum-min ] ] }
|
||||||
] "custom-inlining" set-word-prop
|
{ [ dup float both-inputs? ] [ [ float-min ] ] }
|
||||||
] assoc-each
|
[ f ]
|
||||||
|
} cond nip
|
||||||
|
] "custom-inlining" set-word-prop
|
||||||
|
|
||||||
|
\ max [
|
||||||
|
{
|
||||||
|
{ [ dup fixnum both-inputs? ] [ [ fixnum-max ] ] }
|
||||||
|
{ [ dup float both-inputs? ] [ [ float-max ] ] }
|
||||||
|
[ f ]
|
||||||
|
} cond nip
|
||||||
|
] "custom-inlining" set-word-prop
|
||||||
|
|
||||||
! Generate more efficient code for common idiom
|
! Generate more efficient code for common idiom
|
||||||
\ clone [
|
\ clone [
|
||||||
|
|
|
@ -96,6 +96,8 @@ HOOK: %shr cpu ( dst src1 src2 -- )
|
||||||
HOOK: %shr-imm cpu ( dst src1 src2 -- )
|
HOOK: %shr-imm cpu ( dst src1 src2 -- )
|
||||||
HOOK: %sar cpu ( dst src1 src2 -- )
|
HOOK: %sar cpu ( dst src1 src2 -- )
|
||||||
HOOK: %sar-imm cpu ( dst src1 src2 -- )
|
HOOK: %sar-imm cpu ( dst src1 src2 -- )
|
||||||
|
HOOK: %min cpu ( dst src1 src2 -- )
|
||||||
|
HOOK: %max cpu ( dst src1 src2 -- )
|
||||||
HOOK: %not cpu ( dst src -- )
|
HOOK: %not cpu ( dst src -- )
|
||||||
HOOK: %log2 cpu ( dst src -- )
|
HOOK: %log2 cpu ( dst src -- )
|
||||||
|
|
||||||
|
|
|
@ -123,6 +123,10 @@ M: x86 %xor-imm nip XOR ;
|
||||||
M: x86 %shl-imm nip SHL ;
|
M: x86 %shl-imm nip SHL ;
|
||||||
M: x86 %shr-imm nip SHR ;
|
M: x86 %shr-imm nip SHR ;
|
||||||
M: x86 %sar-imm nip SAR ;
|
M: x86 %sar-imm nip SAR ;
|
||||||
|
|
||||||
|
M: x86 %min nip [ CMP ] [ CMOVG ] 2bi ;
|
||||||
|
M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
|
||||||
|
|
||||||
M: x86 %not drop NOT ;
|
M: x86 %not drop NOT ;
|
||||||
M: x86 %log2 BSR ;
|
M: x86 %log2 BSR ;
|
||||||
|
|
||||||
|
@ -579,3 +583,5 @@ M: x86 small-enough? ( n -- ? )
|
||||||
enable-float-intrinsics
|
enable-float-intrinsics
|
||||||
enable-fsqrt
|
enable-fsqrt
|
||||||
enable-float-min/max ;
|
enable-float-min/max ;
|
||||||
|
|
||||||
|
enable-min/max
|
||||||
|
|
|
@ -235,6 +235,10 @@ IN: math.intervals.tests
|
||||||
interval-contains?
|
interval-contains?
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ t ] [ full-interval 10 10 [a,b] interval-max 10 1/0. [a,b] = ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ full-interval 10 10 [a,b] interval-min -1/0. 10 [a,b] = ] unit-test
|
||||||
|
|
||||||
[ t ] [ 1 100 [a,b] -1 1 [a,b] interval/i [-inf,inf] = ] unit-test
|
[ t ] [ 1 100 [a,b] -1 1 [a,b] interval/i [-inf,inf] = ] unit-test
|
||||||
|
|
||||||
! Accuracy of interval-mod
|
! Accuracy of interval-mod
|
||||||
|
|
|
@ -7,7 +7,7 @@ IN: math.intervals
|
||||||
|
|
||||||
SYMBOL: empty-interval
|
SYMBOL: empty-interval
|
||||||
|
|
||||||
SYMBOL: full-interval
|
SINGLETON: full-interval
|
||||||
|
|
||||||
TUPLE: interval { from read-only } { to read-only } ;
|
TUPLE: interval { from read-only } { to read-only } ;
|
||||||
|
|
||||||
|
@ -238,12 +238,24 @@ MEMO: array-capacity-interval ( -- interval )
|
||||||
] do-empty-interval ;
|
] do-empty-interval ;
|
||||||
|
|
||||||
: interval-max ( i1 i2 -- i3 )
|
: interval-max ( i1 i2 -- i3 )
|
||||||
#! Inaccurate; could be tighter
|
{
|
||||||
[ [ interval-closure ] bi@ [ max ] interval-op nan-not-ok ] do-empty-interval ;
|
{ [ over empty-interval eq? ] [ drop ] }
|
||||||
|
{ [ dup empty-interval eq? ] [ nip ] }
|
||||||
|
{ [ 2dup [ full-interval eq? ] both? ] [ drop ] }
|
||||||
|
{ [ over full-interval eq? ] [ nip from>> first [a,inf] ] }
|
||||||
|
{ [ dup full-interval eq? ] [ drop from>> first [a,inf] ] }
|
||||||
|
[ [ interval-closure ] bi@ [ max ] interval-op nan-not-ok ]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
: interval-min ( i1 i2 -- i3 )
|
: interval-min ( i1 i2 -- i3 )
|
||||||
#! Inaccurate; could be tighter
|
{
|
||||||
[ [ interval-closure ] bi@ [ min ] interval-op nan-not-ok ] do-empty-interval ;
|
{ [ over empty-interval eq? ] [ drop ] }
|
||||||
|
{ [ dup empty-interval eq? ] [ nip ] }
|
||||||
|
{ [ 2dup [ full-interval eq? ] both? ] [ drop ] }
|
||||||
|
{ [ over full-interval eq? ] [ nip to>> first [-inf,a] ] }
|
||||||
|
{ [ dup full-interval eq? ] [ drop to>> first [-inf,a] ] }
|
||||||
|
[ [ interval-closure ] bi@ [ min ] interval-op nan-not-ok ]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
: interval-interior ( i1 -- i2 )
|
: interval-interior ( i1 -- i2 )
|
||||||
dup special-interval? [
|
dup special-interval? [
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
USING: kernel math math.private ;
|
USING: kernel math math.private ;
|
||||||
IN: math.floats.private
|
IN: math.floats.private
|
||||||
|
|
||||||
: float-min ( x y -- z ) [ float< ] 2keep ? ;
|
: float-min ( x y -- z ) [ float< ] most ; foldable
|
||||||
: float-max ( x y -- z ) [ float> ] 2keep ? ;
|
: float-max ( x y -- z ) [ float> ] most ; foldable
|
||||||
|
|
||||||
M: fixnum >float fixnum>float ; inline
|
M: fixnum >float fixnum>float ; inline
|
||||||
M: bignum >float bignum>float ; inline
|
M: bignum >float bignum>float ; inline
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
! Copyright (C) 2004, 2008 Slava Pestov.
|
! Copyright (C) 2004, 2009 Slava Pestov.
|
||||||
! Copyright (C) 2008, Doug Coleman.
|
! Copyright (C) 2008, Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel kernel.private sequences
|
USING: kernel kernel.private sequences
|
||||||
sequences.private math math.private combinators ;
|
sequences.private math math.private combinators ;
|
||||||
IN: math.integers.private
|
IN: math.integers.private
|
||||||
|
|
||||||
|
: fixnum-min ( x y -- z ) [ fixnum< ] most ; foldable
|
||||||
|
: fixnum-max ( x y -- z ) [ fixnum> ] most ; foldable
|
||||||
|
|
||||||
M: integer numerator ; inline
|
M: integer numerator ; inline
|
||||||
M: integer denominator drop 1 ; inline
|
M: integer denominator drop 1 ; inline
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue