From 5297be3e19a33274c70f0a9dc03c96ed4781dbc5 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sat, 15 Aug 2009 18:42:41 -0500 Subject: [PATCH] compiler.tree.modular-arithmetic: stronger optimization handles > 1 usages case as well as values defined and used in loops. Eliminates 5 out of 8 >fixnum calls in benchmark.yuv-to-rgb --- basis/compiler/tree/debugger/debugger.factor | 4 + basis/compiler/tree/def-use/def-use.factor | 2 +- .../simplified/simplified-tests.factor | 18 ++- .../tree/def-use/simplified/simplified.factor | 89 ++++++++--- .../modular-arithmetic-tests.factor | 87 ++++++++++- .../modular-arithmetic.factor | 146 +++++++++++++----- extra/benchmark/yuv-to-rgb/yuv-to-rgb.factor | 9 +- 7 files changed, 287 insertions(+), 68 deletions(-) diff --git a/basis/compiler/tree/debugger/debugger.factor b/basis/compiler/tree/debugger/debugger.factor index a99e547b31..4bf4cf88f0 100644 --- a/basis/compiler/tree/debugger/debugger.factor +++ b/basis/compiler/tree/debugger/debugger.factor @@ -11,6 +11,8 @@ compiler.tree.normalization compiler.tree.cleanup compiler.tree.propagation compiler.tree.propagation.info +compiler.tree.escape-analysis +compiler.tree.tuple-unboxing compiler.tree.def-use compiler.tree.builder compiler.tree.optimizer @@ -209,6 +211,8 @@ SYMBOL: node-count normalize propagate cleanup + escape-analysis + unbox-tuples apply-identities compute-def-use remove-dead-code diff --git a/basis/compiler/tree/def-use/def-use.factor b/basis/compiler/tree/def-use/def-use.factor index 21e79eb6c4..872b6131c9 100644 --- a/basis/compiler/tree/def-use/def-use.factor +++ b/basis/compiler/tree/def-use/def-use.factor @@ -21,7 +21,7 @@ TUPLE: definition value node uses ; ERROR: no-def-error value ; : def-of ( value -- definition ) - dup def-use get at* [ nip ] [ no-def-error ] if ; + def-use get ?at [ no-def-error ] unless ; ERROR: multiple-defs-error ; diff --git a/basis/compiler/tree/def-use/simplified/simplified-tests.factor b/basis/compiler/tree/def-use/simplified/simplified-tests.factor index a1a768d429..72c7e4c60c 100644 --- a/basis/compiler/tree/def-use/simplified/simplified-tests.factor +++ b/basis/compiler/tree/def-use/simplified/simplified-tests.factor @@ -1,6 +1,6 @@ USING: kernel tools.test compiler.tree compiler.tree.builder -compiler.tree.def-use compiler.tree.def-use.simplified accessors -sequences sorting classes ; +compiler.tree.recursive compiler.tree.def-use +compiler.tree.def-use.simplified accessors sequences sorting classes ; IN: compiler.tree.def-use.simplified [ { #call #return } ] [ @@ -8,3 +8,17 @@ IN: compiler.tree.def-use.simplified first out-d>> first actually-used-by [ node>> class ] map natural-sort ] unit-test + +: word-1 ( a -- b ) dup [ word-1 ] when ; inline recursive + +[ { #introduce } ] [ + [ word-1 ] build-tree analyze-recursive compute-def-use + last in-d>> first actually-defined-by + [ node>> class ] map natural-sort +] unit-test + +[ { #if #return } ] [ + [ word-1 ] build-tree analyze-recursive compute-def-use + first out-d>> first actually-used-by + [ node>> class ] map natural-sort +] unit-test \ No newline at end of file diff --git a/basis/compiler/tree/def-use/simplified/simplified.factor b/basis/compiler/tree/def-use/simplified/simplified.factor index 9b2a2038da..c2fb74c97e 100644 --- a/basis/compiler/tree/def-use/simplified/simplified.factor +++ b/basis/compiler/tree/def-use/simplified/simplified.factor @@ -1,7 +1,7 @@ -! Copyright (C) 2008 Slava Pestov. +! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: sequences kernel fry vectors -compiler.tree compiler.tree.def-use ; +USING: sequences kernel fry vectors accessors namespaces assocs sets +stack-checker.branches compiler.tree compiler.tree.def-use ; IN: compiler.tree.def-use.simplified ! Simplified def-use follows chains of copies. @@ -9,32 +9,85 @@ IN: compiler.tree.def-use.simplified ! A 'real' usage is a usage of a value that is not a #renaming. TUPLE: real-usage value node ; -! Def -GENERIC: actually-defined-by* ( value node -- real-usage ) + + +! Def +GENERIC: actually-defined-by* ( value node -- ) + +: (actually-defined-by) ( value -- ) + [ dup defined-by actually-defined-by* ] if-not-visited ; M: #renaming actually-defined-by* - inputs/outputs swap [ index ] dip nth actually-defined-by ; + inputs/outputs swap [ index ] dip nth (actually-defined-by) ; -M: #return-recursive actually-defined-by* real-usage boa ; +M: #call-recursive actually-defined-by* + [ out-d>> index ] [ label>> return>> in-d>> nth ] bi + (actually-defined-by) ; -M: node actually-defined-by* real-usage boa ; +M: #enter-recursive actually-defined-by* + [ out-d>> index ] keep + [ in-d>> nth (actually-defined-by) ] + [ label>> calls>> [ node>> in-d>> nth (actually-defined-by) ] with each ] 2bi ; + +M: #phi actually-defined-by* + [ out-d>> index ] [ phi-in-d>> ] bi + [ + nth dup +bottom+ eq? + [ drop ] [ (actually-defined-by) ] if + ] with each ; + +M: node actually-defined-by* + real-usage boa accum get conjoin ; + +: actually-defined-by ( value -- real-usages ) + [ (actually-defined-by) ] with-simplified-def-use ; ! Use -GENERIC# actually-used-by* 1 ( value node accum -- ) +GENERIC: actually-used-by* ( value node -- ) -: (actually-used-by) ( value accum -- ) - [ [ used-by ] keep ] dip '[ _ swap _ actually-used-by* ] each ; +: (actually-used-by) ( value -- ) + [ dup used-by [ actually-used-by* ] with each ] if-not-visited ; M: #renaming actually-used-by* - [ inputs/outputs [ indices ] dip nths ] dip - '[ _ (actually-used-by) ] each ; + inputs/outputs [ indices ] dip nths + [ (actually-used-by) ] each ; -M: #return-recursive actually-used-by* [ real-usage boa ] dip push ; +M: #return-recursive actually-used-by* + [ in-d>> index ] keep + [ out-d>> nth (actually-used-by) ] + [ label>> calls>> [ node>> out-d>> nth (actually-used-by) ] with each ] 2bi ; -M: node actually-used-by* [ real-usage boa ] dip push ; +M: #call-recursive actually-used-by* + [ in-d>> index ] [ label>> enter-out>> nth ] bi + (actually-used-by) ; + +M: #enter-recursive actually-used-by* + [ in-d>> index ] [ out-d>> nth ] bi (actually-used-by) ; + +M: #phi actually-used-by* + [ phi-in-d>> [ index ] with map-find drop ] [ out-d>> nth ] bi + (actually-used-by) ; + +M: #recursive actually-used-by* 2drop ; + +M: node actually-used-by* + real-usage boa accum get conjoin ; : actually-used-by ( value -- real-usages ) - 10 [ (actually-used-by) ] keep ; + [ (actually-used-by) ] with-simplified-def-use ; diff --git a/basis/compiler/tree/modular-arithmetic/modular-arithmetic-tests.factor b/basis/compiler/tree/modular-arithmetic/modular-arithmetic-tests.factor index 7d40bf3fc1..9c3f98d412 100644 --- a/basis/compiler/tree/modular-arithmetic/modular-arithmetic-tests.factor +++ b/basis/compiler/tree/modular-arithmetic/modular-arithmetic-tests.factor @@ -1,11 +1,10 @@ ! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: kernel kernel.private tools.test math math.partial-dispatch -math.private accessors slots.private sequences sequences.private strings sbufs -compiler.tree.builder -compiler.tree.normalization -compiler.tree.debugger -alien.accessors layouts combinators byte-arrays ; +prettyprint math.private accessors slots.private sequences +sequences.private strings sbufs compiler.tree.builder +compiler.tree.normalization compiler.tree.debugger alien.accessors +layouts combinators byte-arrays ; IN: compiler.tree.modular-arithmetic.tests : test-modular-arithmetic ( quot -- quot' ) @@ -93,8 +92,6 @@ TUPLE: declared-fixnum { x fixnum } ; [ { string sbuf } declare ] \ push-all def>> append \ >fixnum inlined? ] unit-test - - [ t ] [ [ { integer } declare [ 256 mod ] map @@ -140,6 +137,11 @@ TUPLE: declared-fixnum { x fixnum } ; [ [ >fixnum 255 fixnum-bitand ] ] [ [ >integer 256 rem ] test-modular-arithmetic ] unit-test +[ t ] [ + [ { fixnum fixnum } declare + [ 1 + >fixnum ] [ 2 + >fixnum ] bi ] + { >fixnum } inlined? +] unit-test + [ [ "COMPLEX SHUFFLE" fixnum+fast "COMPLEX SHUFFLE" set-alien-unsigned-1 ] ] [ [ [ { fixnum fixnum } declare + ] 2dip set-alien-unsigned-1 ] test-modular-arithmetic ] unit-test @@ -176,3 +178,74 @@ cell { [ 0 10 10 [ 1 pick 0 + >fixnum pick set-nth-unsafe [ 1 + >fixnum ] dip ] times ] { >fixnum } inlined? ] unit-test + +[ f ] [ [ + >fixnum ] { >fixnum } inlined? ] unit-test + +[ t ] [ + [ >integer [ >fixnum ] [ >fixnum ] bi ] + { >integer } inlined? +] unit-test + +[ f ] [ + [ >integer [ >fixnum ] [ >fixnum ] bi ] + { >fixnum } inlined? +] unit-test + +[ t ] [ + [ >integer [ 2 + >fixnum ] [ 3 + >fixnum ] bi ] + { >integer } inlined? +] unit-test + +[ f ] [ + [ >integer [ 2 + >fixnum ] [ 3 + >fixnum ] bi ] + { >fixnum } inlined? +] unit-test + +[ f ] [ + [ [ { fixnum } declare 2 fixnum+ ] dip [ >fixnum 2 - ] [ ] if ] + { fixnum+ } inlined? +] unit-test + +[ t ] [ + [ { fixnum boolean } declare [ 1 + ] [ "HI" throw ] if >fixnum ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ t ] [ + [ { fixnum boolean } declare [ 1 + ] [ drop 5 ] if >fixnum ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ t ] [ + [ { fixnum boolean } declare [ 1 + ] [ 2 + ] if >fixnum ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ [ [ 1 ] [ 4 ] if ] ] [ + [ [ 1.5 ] [ 4 ] if >fixnum ] test-modular-arithmetic +] unit-test + +[ [ [ 1 ] [ 2 ] if ] ] [ + [ [ 1.5 ] [ 2.3 ] if >fixnum ] test-modular-arithmetic +] unit-test + +[ f ] [ + [ { fixnum fixnum boolean } declare [ [ 3 * ] [ 1 + ] dip ] [ [ 4 - ] [ 2 + ] dip ] if >fixnum ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ t ] [ + [ 0 1000 [ 1 + dup >fixnum . ] times drop ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ t ] [ + [ { fixnum } declare 3 + [ 1000 ] dip [ >fixnum . ] curry times ] + { fixnum+ >fixnum } inlined? +] unit-test + +[ t ] [ + [ 0 1000 [ 1 + ] times >fixnum ] + { fixnum+ >fixnum } inlined? +] unit-test + diff --git a/basis/compiler/tree/modular-arithmetic/modular-arithmetic.factor b/basis/compiler/tree/modular-arithmetic/modular-arithmetic.factor index 148286faba..84f11aeb47 100644 --- a/basis/compiler/tree/modular-arithmetic/modular-arithmetic.factor +++ b/basis/compiler/tree/modular-arithmetic/modular-arithmetic.factor @@ -1,10 +1,11 @@ ! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: math math.partial-dispatch namespaces sequences sets -accessors assocs words kernel memoize fry combinators +USING: math math.private math.partial-dispatch namespaces sequences +sets accessors assocs words kernel memoize fry combinators combinators.short-circuit layouts alien.accessors compiler.tree compiler.tree.combinators +compiler.tree.propagation.info compiler.tree.def-use compiler.tree.def-use.simplified compiler.tree.late-optimizations ; @@ -19,17 +20,24 @@ IN: compiler.tree.modular-arithmetic ! ==> ! [ >fixnum ] bi@ fixnum+fast +! Words where the low-order bits of the output only depends on the +! low-order bits of the input. If the output is only used for its +! low-order bits, then the word can be converted into a form that is +! cheaper to compute. { + - * bitand bitor bitxor } [ [ t "modular-arithmetic" set-word-prop ] each-integer-derived-op ] each -{ bitand bitor bitxor bitnot } +{ bitand bitor bitxor bitnot >integer } [ t "modular-arithmetic" set-word-prop ] each +! Words that only use the low-order bits of their input. If the input +! is a modular arithmetic word, then the input can be converted into +! a form that is cheaper to compute. { - >fixnum + >fixnum bignum>fixnum float>fixnum set-alien-unsigned-1 set-alien-signed-1 set-alien-unsigned-2 set-alien-signed-2 } @@ -38,80 +46,148 @@ cell 8 = [ ] when [ t "low-order" set-word-prop ] each -SYMBOL: modularize-values +! Values which only have their low-order bits used. This set starts out +! big and is gradually refined. +SYMBOL: modular-values : modular-value? ( value -- ? ) - modularize-values get key? ; + modular-values get key? ; -: modularize-value ( value -- ) modularize-values get conjoin ; +: modular-value ( value -- ) + modular-values get conjoin ; -GENERIC: maybe-modularize* ( value node -- ) +! Values which are known to be fixnums. +SYMBOL: fixnum-values -: maybe-modularize ( value -- ) - actually-defined-by [ value>> ] [ node>> ] bi - over actually-used-by length 1 = [ - maybe-modularize* - ] [ 2drop ] if ; +: fixnum-value? ( value -- ? ) + fixnum-values get key? ; -M: #call maybe-modularize* - dup word>> "modular-arithmetic" word-prop [ - [ modularize-value ] - [ in-d>> [ maybe-modularize ] each ] bi* - ] [ 2drop ] if ; +: fixnum-value ( value -- ) + fixnum-values get conjoin ; -M: node maybe-modularize* 2drop ; +GENERIC: compute-modular-candidates* ( node -- ) -GENERIC: compute-modularized-values* ( node -- ) +M: #push compute-modular-candidates* + [ out-d>> first ] [ literal>> ] bi + real? [ [ modular-value ] [ fixnum-value ] bi ] [ drop ] if ; -M: #call compute-modularized-values* - dup word>> "low-order" word-prop - [ in-d>> first maybe-modularize ] [ drop ] if ; +M: #call compute-modular-candidates* + { + { + [ dup word>> "modular-arithmetic" word-prop ] + [ out-d>> first [ modular-value ] [ fixnum-value ] bi ] + } + { + [ dup word>> "low-order" word-prop ] + [ in-d>> first modular-value ] + } + [ drop ] + } cond ; -M: node compute-modularized-values* drop ; +M: node compute-modular-candidates* + drop ; -: compute-modularized-values ( nodes -- ) - [ compute-modularized-values* ] each-node ; +: compute-modular-candidates ( nodes -- ) + H{ } clone modular-values set + H{ } clone fixnum-values set + [ compute-modular-candidates* ] each-node ; + +GENERIC: only-reads-low-order? ( node -- ? ) + +M: #call only-reads-low-order? + { + [ word>> "low-order" word-prop ] + [ + { + [ word>> "modular-arithmetic" word-prop ] + [ out-d>> first modular-values get key? ] + } 1&& + ] + } 1|| ; + +M: node only-reads-low-order? drop f ; + +SYMBOL: changed? + +: only-used-as-low-order? ( value -- ? ) + actually-used-by [ node>> only-reads-low-order? ] all? ; + +: (compute-modular-values) ( -- ) + modular-values get keys [ + dup only-used-as-low-order? + [ drop ] [ modular-values get delete-at changed? on ] if + ] each ; + +: compute-modular-values ( -- ) + [ changed? off (compute-modular-values) changed? get ] loop ; GENERIC: optimize-modular-arithmetic* ( node -- nodes ) +M: #push optimize-modular-arithmetic* + dup out-d>> first modular-value? [ + [ >fixnum ] change-literal + ] when ; + +: input-will-be-fixnum? ( #call -- ? ) + in-d>> first actually-defined-by + [ value>> { [ modular-value? ] [ fixnum-value? ] } 1&& ] all? ; + +: output-will-be-coerced? ( #call -- ? ) + out-d>> first modular-value? ; + : redundant->fixnum? ( #call -- ? ) - in-d>> first actually-defined-by value>> modular-value? ; + { + [ input-will-be-fixnum? ] + [ output-will-be-coerced? ] + } 1|| ; : optimize->fixnum ( #call -- nodes ) dup redundant->fixnum? [ drop f ] when ; +: should-be->fixnum? ( #call -- ? ) + out-d>> first modular-value? ; + : optimize->integer ( #call -- nodes ) - dup out-d>> first actually-used-by dup length 1 = [ - first node>> { [ #call? ] [ word>> \ >fixnum eq? ] } 1&& - [ drop { } ] when - ] [ drop ] if ; + dup should-be->fixnum? [ \ >fixnum >>word ] when ; MEMO: fixnum-coercion ( flags -- nodes ) + ! flags indicate which input parameters are already known to be fixnums, + ! and don't need a coercion as a result. [ [ ] [ >fixnum ] ? ] map '[ _ spread ] splice-quot ; +: modular-value-info ( #call -- alist ) + [ in-d>> ] [ out-d>> ] bi append + fixnum '[ _ ] { } map>assoc ; + : optimize-modular-op ( #call -- nodes ) dup out-d>> first modular-value? [ [ in-d>> ] [ word>> integer-op-input-classes ] [ ] tri [ [ - [ actually-defined-by value>> modular-value? ] + [ actually-defined-by [ value>> modular-value? ] all? ] [ fixnum eq? ] bi* or ] 2map fixnum-coercion ] [ [ modular-variant ] change-word ] bi* suffix ] when ; +: optimize-low-order-op ( #call -- nodes ) + dup in-d>> first modular-value? [ + [ ] [ in-d>> first ] [ info>> ] tri + [ drop fixnum ] change-at + ] when ; + M: #call optimize-modular-arithmetic* dup word>> { - { [ dup \ >fixnum eq? ] [ drop optimize->fixnum ] } + { [ dup { >fixnum bignum>fixnum float>fixnum } memq? ] [ drop optimize->fixnum ] } { [ dup \ >integer eq? ] [ drop optimize->integer ] } { [ dup "modular-arithmetic" word-prop ] [ drop optimize-modular-op ] } + { [ dup "low-order" word-prop ] [ drop optimize-low-order-op ] } [ drop ] } cond ; M: node optimize-modular-arithmetic* ; : optimize-modular-arithmetic ( nodes -- nodes' ) - H{ } clone modularize-values set - dup compute-modularized-values + dup compute-modular-candidates compute-modular-values [ optimize-modular-arithmetic* ] map-nodes ; diff --git a/extra/benchmark/yuv-to-rgb/yuv-to-rgb.factor b/extra/benchmark/yuv-to-rgb/yuv-to-rgb.factor index ca57de822f..9562e42c4e 100644 --- a/extra/benchmark/yuv-to-rgb/yuv-to-rgb.factor +++ b/extra/benchmark/yuv-to-rgb/yuv-to-rgb.factor @@ -36,8 +36,7 @@ C-STRUCT: yuv_buffer 255 min 0 max ; inline : stride ( line yuv -- uvy yy ) - [ yuv_buffer-uv_stride swap 2/ * >fixnum ] - [ yuv_buffer-y_stride * >fixnum ] 2bi ; inline + [ yuv_buffer-uv_stride swap 2/ * ] [ yuv_buffer-y_stride * ] 2bi ; inline : compute-y ( yuv uvy yy x -- y ) + >fixnum nip swap yuv_buffer-y swap alien-unsigned-1 16 - ; inline @@ -74,16 +73,16 @@ C-STRUCT: yuv_buffer drop ; inline : yuv>rgb-pixel ( index rgb yuv uvy yy x -- index ) - compute-yuv compute-rgb store-rgb 3 + >fixnum ; inline + compute-yuv compute-rgb store-rgb 3 + ; inline : yuv>rgb-row ( index rgb yuv y -- index ) over stride - pick yuv_buffer-y_width >fixnum + pick yuv_buffer-y_width [ yuv>rgb-pixel ] with with with with each ; inline : yuv>rgb ( rgb yuv -- ) [ 0 ] 2dip - dup yuv_buffer-y_height >fixnum + dup yuv_buffer-y_height [ yuv>rgb-row ] with with each drop ;