diff --git a/basis/compiler/cfg/hats/hats.factor b/basis/compiler/cfg/hats/hats.factor index 4bfcb3dac8..cf5c0095ca 100644 --- a/basis/compiler/cfg/hats/hats.factor +++ b/basis/compiler/cfg/hats/hats.factor @@ -45,6 +45,7 @@ insn-classes get [ [ next-vreg dup ] dip { { [ dup not ] [ drop \ f tag-number ##load-immediate ] } { [ dup fixnum? ] [ tag-fixnum ##load-immediate ] } + { [ dup float? ] [ ##load-constant ] } [ ##load-reference ] } cond ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 716ae46592..97bdccf045 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -29,6 +29,10 @@ INSN: ##load-reference def: dst/int-rep constant: obj ; +INSN: ##load-constant +def: dst/int-rep +constant: obj ; + INSN: ##peek def: dst/int-rep literal: loc ; diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index d9c2eab6c3..f103a0195f 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -96,9 +96,8 @@ SYMBOL: always-boxed H{ } clone [ '[ [ - dup ##load-reference? [ drop ] [ - [ _ (compute-always-boxed) ] each-def-rep - ] if + dup [ ##load-reference? ] [ ##load-constant? ] bi or + [ drop ] [ [ _ (compute-always-boxed) ] each-def-rep ] if ] each-non-phi ] each-basic-block ] keep ; diff --git a/basis/compiler/cfg/value-numbering/expressions/expressions.factor b/basis/compiler/cfg/value-numbering/expressions/expressions.factor index 03aa28d70a..0ac973a206 100644 --- a/basis/compiler/cfg/value-numbering/expressions/expressions.factor +++ b/basis/compiler/cfg/value-numbering/expressions/expressions.factor @@ -14,10 +14,10 @@ C: constant-expr M: constant-expr equal? over constant-expr? [ - { - [ [ value>> class ] bi@ = ] - [ [ value>> ] bi@ = ] - } 2&& + [ value>> ] bi@ + 2dup [ float? ] both? [ fp-bitwise= ] [ + { [ [ class ] bi@ = ] [ = ] } 2&& + ] if ] [ 2drop f ] if ; TUPLE: reference-expr < expr value ; @@ -25,13 +25,7 @@ TUPLE: reference-expr < expr value ; C: reference-expr M: reference-expr equal? - over reference-expr? [ - [ value>> ] bi@ { - { [ 2dup eq? ] [ 2drop t ] } - { [ 2dup [ float? ] both? ] [ fp-bitwise= ] } - [ 2drop f ] - } cond - ] [ 2drop f ] if ; + over reference-expr? [ [ value>> ] bi@ eq? ] [ 2drop f ] if ; : constant>vn ( constant -- vn ) expr>vn ; inline @@ -43,6 +37,8 @@ M: ##load-immediate >expr val>> ; M: ##load-reference >expr obj>> ; +M: ##load-constant >expr obj>> ; + << : input-values ( slot-specs -- slot-specs' ) diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index e598862c2b..5759d7467a 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators combinators.short-circuit arrays fry kernel layouts math namespaces sequences cpu.architecture -math.bitwise math.order classes vectors locals make +math.bitwise math.order math.vectors.simd.intrinsics classes +vectors locals make alien.c-types io.binary grouping compiler.cfg compiler.cfg.registers compiler.cfg.comparisons @@ -184,7 +185,7 @@ M: ##compare-branch rewrite : >boolean-insn ( insn ? -- insn' ) [ dst>> ] dip { - { t [ t \ ##load-reference new-insn ] } + { t [ t \ ##load-constant new-insn ] } { f [ \ f tag-number \ ##load-immediate new-insn ] } } case ; @@ -258,16 +259,23 @@ M: ##sub-imm rewrite [ sub-imm>add-imm ] } cond ; -: strength-reduce-mul ( insn -- insn' ) - [ [ dst>> ] [ src1>> ] bi ] [ src2>> log2 ] bi \ ##shl-imm new-insn ; +: mul-to-neg? ( insn -- ? ) + src2>> -1 = ; -: strength-reduce-mul? ( insn -- ? ) +: mul-to-neg ( insn -- insn' ) + [ dst>> ] [ src1>> ] bi \ ##neg new-insn ; + +: mul-to-shl? ( insn -- ? ) src2>> power-of-2? ; +: mul-to-shl ( insn -- insn' ) + [ [ dst>> ] [ src1>> ] bi ] [ src2>> log2 ] bi \ ##shl-imm new-insn ; + M: ##mul-imm rewrite { { [ dup constant-fold? ] [ constant-fold ] } - { [ dup strength-reduce-mul? ] [ strength-reduce-mul ] } + { [ dup mul-to-neg? ] [ mul-to-neg ] } + { [ dup mul-to-shl? ] [ mul-to-shl ] } { [ dup src1>> vreg>expr mul-imm-expr? ] [ \ ##mul-imm reassociate ] } [ drop f ] } cond ; @@ -338,8 +346,15 @@ M: ##add rewrite \ ##add-imm rewrite-arithmetic-commutative ; : rewrite-subtraction-identity ( insn -- insn' ) dst>> 0 \ ##load-immediate new-insn ; +: sub-to-neg? ( ##sub -- ? ) + src1>> vn>expr expr-zero? ; + +: sub-to-neg ( ##sub -- insn ) + [ dst>> ] [ src2>> ] bi \ ##neg new-insn ; + M: ##sub rewrite { + { [ dup sub-to-neg? ] [ sub-to-neg ] } { [ dup subtraction-identity? ] [ rewrite-subtraction-identity ] } [ \ ##sub-imm rewrite-arithmetic ] } cond ; @@ -375,3 +390,44 @@ M: ##sar rewrite \ ##sar-imm rewrite-arithmetic ; M: ##unbox-any-c-ptr rewrite dup src>> vreg>expr dup box-displaced-alien-expr? [ rewrite-unbox-displaced-alien ] [ 2drop f ] if ; + +! Some lame constant folding for SIMD intrinsics. Eventually this +! should be redone completely. + +: rewrite-shuffle-vector ( insn expr -- insn' ) + 2dup [ rep>> ] bi@ eq? [ + [ [ dst>> ] [ src>> vn>vreg ] bi* ] + [ [ shuffle>> ] bi@ nths ] + [ drop rep>> ] + 2tri \ ##shuffle-vector new-insn + ] [ 2drop f ] if ; + +: (fold-shuffle-vector) ( shuffle bytes -- bytes' ) + 2dup length swap length /i group nths concat ; + +: fold-shuffle-vector ( insn expr -- insn' ) + [ [ dst>> ] [ shuffle>> ] bi ] dip value>> + (fold-shuffle-vector) \ ##load-constant new-insn ; + +M: ##shuffle-vector rewrite + dup src>> vreg>expr { + { [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] } + { [ dup reference-expr? ] [ fold-shuffle-vector ] } + { [ dup constant-expr? ] [ fold-shuffle-vector ] } + [ 2drop f ] + } cond ; + +: (fold-scalar>vector) ( insn bytes -- insn' ) + [ [ dst>> ] [ rep>> rep-components ] bi ] dip concat + \ ##load-constant new-insn ; + +: fold-scalar>vector ( insn expr -- insn' ) + value>> over rep>> { + { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] } + { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] } + [ rep-component-type heap-size >le (fold-scalar>vector) ] + } case ; + +M: ##scalar>vector rewrite + dup src>> vreg>expr dup constant-expr? + [ fold-scalar>vector ] [ 2drop f ] if ; diff --git a/basis/compiler/cfg/value-numbering/simplify/simplify.factor b/basis/compiler/cfg/value-numbering/simplify/simplify.factor index e930bcaae9..c2026a9483 100644 --- a/basis/compiler/cfg/value-numbering/simplify/simplify.factor +++ b/basis/compiler/cfg/value-numbering/simplify/simplify.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors combinators classes math layouts +sequences math.vectors.simd.intrinsics compiler.cfg.instructions compiler.cfg.value-numbering.graph compiler.cfg.value-numbering.expressions ; @@ -22,6 +23,22 @@ M: unbox-any-c-ptr-expr simplify* simplify-unbox-alien ; : expr-one? ( expr -- ? ) T{ constant-expr f 1 } = ; inline +: expr-neg-one? ( expr -- ? ) T{ constant-expr f -1 } = ; inline + +: >unary-expr< ( expr -- in ) src>> vn>expr ; inline + +M: neg-expr simplify* + >unary-expr< { + { [ dup neg-expr? ] [ src>> ] } + [ drop f ] + } cond ; + +M: not-expr simplify* + >unary-expr< { + { [ dup not-expr? ] [ src>> ] } + [ drop f ] + } cond ; + : >binary-expr< ( expr -- in1 in2 ) [ src1>> vn>expr ] [ src2>> vn>expr ] bi ; inline @@ -113,6 +130,16 @@ M: box-displaced-alien-expr simplify* [ 2drop f ] } cond ; +M: scalar>vector-expr simplify* + src>> vn>expr { + { [ dup vector>scalar-expr? ] [ src>> ] } + [ drop f ] + } cond ; + +M: shuffle-vector-expr simplify* + [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri + sequence= [ drop f ] unless ; + M: expr simplify* drop f ; : simplify ( expr -- vn ) diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index 1a28aaa969..663a2f0193 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -20,15 +20,15 @@ IN: compiler.cfg.value-numbering.tests ! Folding constants together [ { - T{ ##load-reference f 0 0.0 } - T{ ##load-reference f 1 -0.0 } + T{ ##load-constant f 0 0.0 } + T{ ##load-constant f 1 -0.0 } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } ] [ { - T{ ##load-reference f 0 0.0 } - T{ ##load-reference f 1 -0.0 } + T{ ##load-constant f 0 0.0 } + T{ ##load-constant f 1 -0.0 } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } value-numbering-step @@ -36,15 +36,15 @@ IN: compiler.cfg.value-numbering.tests [ { - T{ ##load-reference f 0 0.0 } + T{ ##load-constant f 0 0.0 } T{ ##copy f 1 0 any-rep } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } ] [ { - T{ ##load-reference f 0 0.0 } - T{ ##load-reference f 1 0.0 } + T{ ##load-constant f 0 0.0 } + T{ ##load-constant f 1 0.0 } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } value-numbering-step @@ -52,15 +52,15 @@ IN: compiler.cfg.value-numbering.tests [ { - T{ ##load-reference f 0 t } + T{ ##load-constant f 0 t } T{ ##copy f 1 0 any-rep } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } ] [ { - T{ ##load-reference f 0 t } - T{ ##load-reference f 1 t } + T{ ##load-constant f 0 t } + T{ ##load-constant f 1 t } T{ ##replace f 0 D 0 } T{ ##replace f 1 D 1 } } value-numbering-step @@ -236,6 +236,78 @@ IN: compiler.cfg.value-numbering.tests } value-numbering-step ] unit-test +[ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 -1 } + T{ ##neg f 2 0 } + } +] [ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 -1 } + T{ ##mul f 2 0 1 } + } value-numbering-step +] unit-test + +[ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 -1 } + T{ ##neg f 2 0 } + } +] [ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 -1 } + T{ ##mul f 2 1 0 } + } value-numbering-step +] unit-test + +[ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 0 } + T{ ##neg f 2 0 } + } +] [ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 0 } + T{ ##sub f 2 1 0 } + } value-numbering-step +] unit-test + +[ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 0 } + T{ ##neg f 2 0 } + T{ ##copy f 3 0 any-rep } + } +] [ + { + T{ ##peek f 0 D 0 } + T{ ##load-immediate f 1 0 } + T{ ##sub f 2 1 0 } + T{ ##sub f 3 1 2 } + } value-numbering-step +] unit-test + +[ + { + T{ ##peek f 0 D 0 } + T{ ##not f 1 0 } + T{ ##copy f 2 0 any-rep } + } +] [ + { + T{ ##peek f 0 D 0 } + T{ ##not f 1 0 } + T{ ##not f 2 1 } + } value-numbering-step +] unit-test + [ { T{ ##peek f 0 D 0 } @@ -947,7 +1019,7 @@ cell 8 = [ { T{ ##load-immediate f 1 1 } T{ ##load-immediate f 2 2 } - T{ ##load-reference f 3 t } + T{ ##load-constant f 3 t } } ] [ { @@ -961,7 +1033,7 @@ cell 8 = [ { T{ ##load-immediate f 1 1 } T{ ##load-immediate f 2 2 } - T{ ##load-reference f 3 t } + T{ ##load-constant f 3 t } } ] [ { @@ -1000,7 +1072,7 @@ cell 8 = [ [ { T{ ##peek f 0 D 0 } - T{ ##load-reference f 1 t } + T{ ##load-constant f 1 t } } ] [ { @@ -1024,7 +1096,7 @@ cell 8 = [ [ { T{ ##peek f 0 D 0 } - T{ ##load-reference f 1 t } + T{ ##load-constant f 1 t } } ] [ { @@ -1048,7 +1120,7 @@ cell 8 = [ [ { T{ ##peek f 0 D 0 } - T{ ##load-reference f 1 t } + T{ ##load-constant f 1 t } } ] [ { @@ -1057,6 +1129,66 @@ cell 8 = [ } value-numbering-step ] unit-test +[ + { + T{ ##vector>scalar f 1 0 float-4-rep } + T{ ##copy f 2 0 any-rep } + } +] [ + { + T{ ##vector>scalar f 1 0 float-4-rep } + T{ ##scalar>vector f 2 1 float-4-rep } + } value-numbering-step +] unit-test + +[ + { + T{ ##copy f 1 0 any-rep } + } +] [ + { + T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep } + } value-numbering-step +] unit-test + +[ + { + T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep } + } +] [ + { + T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep } + } value-numbering-step +] unit-test + +[ + { + T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + } +] [ + { + T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + } value-numbering-step +] unit-test + +[ + { + T{ ##load-constant f 0 1.25 } + T{ ##load-constant f 1 B{ 0 0 160 63 0 0 160 63 0 0 160 63 0 0 160 63 } } + T{ ##copy f 2 1 any-rep } + } +] [ + { + T{ ##load-constant f 0 1.25 } + T{ ##scalar>vector f 1 0 float-4-rep } + T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + } value-numbering-step +] unit-test + : test-branch-folding ( insns -- insns' n ) [ V{ 0 1 } clone >>successors basic-block set value-numbering-step ] keep @@ -1203,7 +1335,7 @@ cell 8 = [ [ { T{ ##peek f 0 D 0 } - T{ ##load-reference f 1 t } + T{ ##load-constant f 1 t } T{ ##branch } } 0 diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 8e99f79b36..b0307f685d 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -110,6 +110,7 @@ SYNTAX: CODEGEN: CODEGEN: ##load-immediate %load-immediate CODEGEN: ##load-reference %load-reference +CODEGEN: ##load-constant %load-reference CODEGEN: ##peek %peek CODEGEN: ##replace %replace CODEGEN: ##inc-d %inc-d