From cdc7b7e2c75546340bbbeab27a5fbd00f5c429e1 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 30 Sep 2009 05:00:36 -0500 Subject: [PATCH] Various minor compiler tweaks: Combine address calculation with dereferencing in alien accessors; convert SIMD XOR of a vector with itself into an XOR of the destination with itself; convert SIMD unbox of zero vector into XOR of the destination with itself; fix SIMD indexing on x86-64 --- .../cfg/instructions/instructions.factor | 57 +++++++++++++------ .../cfg/intrinsics/alien/alien.factor | 8 +-- .../representations/representations.factor | 23 +++++++- .../value-numbering/rewrite/rewrite.factor | 27 +++++++++ .../value-numbering-tests.factor | 10 ++++ basis/cpu/architecture/architecture.factor | 42 ++++++++------ basis/cpu/x86/x86.factor | 35 ++++++------ 7 files changed, 146 insertions(+), 56 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 97bdccf045..cf0f668db3 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -468,65 +468,88 @@ use: src/int-rep ; ! Alien accessors INSN: ##alien-unsigned-1 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-unsigned-2 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-unsigned-4 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-signed-1 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-signed-2 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-signed-4 def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-cell def: dst/int-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-float def: dst/float-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-double def: dst/double-rep -use: src/int-rep ; +use: src/int-rep +literal: offset ; INSN: ##alien-vector def: dst use: src/int-rep -literal: rep ; +literal: offset rep ; INSN: ##set-alien-integer-1 -use: src/int-rep value/int-rep ; +use: src/int-rep +literal: offset +use: value/int-rep ; INSN: ##set-alien-integer-2 -use: src/int-rep value/int-rep ; +use: src/int-rep +literal: offset +use: value/int-rep ; INSN: ##set-alien-integer-4 -use: src/int-rep value/int-rep ; +use: src/int-rep +literal: offset +use: value/int-rep ; INSN: ##set-alien-cell -use: src/int-rep value/int-rep ; +use: src/int-rep +literal: offset +use: value/int-rep ; INSN: ##set-alien-float -use: src/int-rep value/float-rep ; +use: src/int-rep +literal: offset +use: value/float-rep ; INSN: ##set-alien-double -use: src/int-rep value/double-rep ; +use: src/int-rep +literal: offset +use: value/double-rep ; INSN: ##set-alien-vector -use: src/int-rep value +use: src/int-rep +literal: offset +use: value literal: rep ; ! Memory allocation diff --git a/basis/compiler/cfg/intrinsics/alien/alien.factor b/basis/compiler/cfg/intrinsics/alien/alien.factor index 2b903813a0..bc6baa21b7 100644 --- a/basis/compiler/cfg/intrinsics/alien/alien.factor +++ b/basis/compiler/cfg/intrinsics/alien/alien.factor @@ -33,10 +33,10 @@ IN: compiler.cfg.intrinsics.alien [ second class>> fixnum class<= ] bi and ; -: prepare-alien-accessor ( info -- offset-vreg ) - class>> [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add ; +: prepare-alien-accessor ( info -- ptr-vreg offset ) + class>> [ 2inputs ^^untag-fixnum swap ] dip ^^unbox-c-ptr ^^add 0 ; -: prepare-alien-getter ( infos -- offset-vreg ) +: prepare-alien-getter ( infos -- ptr-vreg offset ) first prepare-alien-accessor ; : inline-alien-getter ( node quot -- ) @@ -49,7 +49,7 @@ IN: compiler.cfg.intrinsics.alien [ third class>> fixnum class<= ] tri and and ; -: prepare-alien-setter ( infos -- offset-vreg ) +: prepare-alien-setter ( infos -- ptr-vreg offset ) second prepare-alien-accessor ; : inline-alien-integer-setter ( node quot -- ) diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index f103a0195f..423f415742 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2009 Slava Pestov ! See http://factorcode.org/license.txt for BSD license. USING: kernel fry accessors sequences assocs sets namespaces -arrays combinators make locals deques dlists layouts -cpu.architecture compiler.utilities +arrays combinators combinators.short-circuit make locals deques +dlists layouts cpu.architecture compiler.utilities compiler.cfg compiler.cfg.rpo compiler.cfg.hats @@ -208,6 +208,25 @@ SYMBOL: phi-mappings M: ##phi conversions-for-insn [ , ] [ [ inputs>> values ] [ dst>> ] bi phi-mappings get set-at ] bi ; +! When a literal zero vector is unboxed, we replace the ##load-reference +! with a ##zero-vector instruction since this is more efficient. +: convert-to-zero-vector? ( insn -- ? ) + { + [ dst>> rep-of vector-rep? ] + [ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ] + } 1&& ; + +: convert-to-zero-vector ( insn -- ) + dst>> dup rep-of ##zero-vector ; + +M: ##load-reference conversions-for-insn + dup convert-to-zero-vector? + [ convert-to-zero-vector ] [ call-next-method ] if ; + +M: ##load-constant conversions-for-insn + dup convert-to-zero-vector? + [ convert-to-zero-vector ] [ call-next-method ] if ; + M: vreg-insn conversions-for-insn [ compute-renaming-set ] [ perform-renaming ] bi ; diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 5759d7467a..d38c25a841 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -391,6 +391,29 @@ M: ##unbox-any-c-ptr rewrite dup src>> vreg>expr dup box-displaced-alien-expr? [ rewrite-unbox-displaced-alien ] [ 2drop f ] if ; +! More efficient addressing for alien intrinsics +: rewrite-alien-addressing ( insn -- insn' ) + dup src>> vreg>expr dup add-imm-expr? [ + [ src1>> vn>vreg ] [ src2>> vn>constant ] bi + [ >>src ] [ '[ _ + ] change-offset ] bi* + ] [ 2drop f ] if ; + +M: ##alien-unsigned-1 rewrite rewrite-alien-addressing ; +M: ##alien-unsigned-2 rewrite rewrite-alien-addressing ; +M: ##alien-unsigned-4 rewrite rewrite-alien-addressing ; +M: ##alien-signed-1 rewrite rewrite-alien-addressing ; +M: ##alien-signed-2 rewrite rewrite-alien-addressing ; +M: ##alien-signed-4 rewrite rewrite-alien-addressing ; +M: ##alien-float rewrite rewrite-alien-addressing ; +M: ##alien-double rewrite rewrite-alien-addressing ; +M: ##alien-vector rewrite rewrite-alien-addressing ; +M: ##set-alien-integer-1 rewrite rewrite-alien-addressing ; +M: ##set-alien-integer-2 rewrite rewrite-alien-addressing ; +M: ##set-alien-integer-4 rewrite rewrite-alien-addressing ; +M: ##set-alien-float rewrite rewrite-alien-addressing ; +M: ##set-alien-double rewrite rewrite-alien-addressing ; +M: ##set-alien-vector rewrite rewrite-alien-addressing ; + ! Some lame constant folding for SIMD intrinsics. Eventually this ! should be redone completely. @@ -431,3 +454,7 @@ M: ##shuffle-vector rewrite M: ##scalar>vector rewrite dup src>> vreg>expr dup constant-expr? [ fold-scalar>vector ] [ 2drop f ] if ; + +M: ##xor-vector rewrite + dup [ src1>> vreg>vn ] [ src2>> vreg>vn ] bi eq? + [ [ dst>> ] [ rep>> ] bi \ ##zero-vector new-insn ] [ drop f ] if ; diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index 663a2f0193..7751cba039 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -1189,6 +1189,16 @@ cell 8 = [ } value-numbering-step ] unit-test +[ + { + T{ ##zero-vector f 2 float-4-rep } + } +] [ + { + T{ ##xor-vector f 2 1 1 float-4-rep } + } value-numbering-step +] unit-test + : test-branch-folding ( insns -- insns' n ) [ V{ 0 1 } clone >>successors basic-block set value-numbering-step ] keep diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 9d6f8fd662..3b1f57d08e 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -114,6 +114,14 @@ M: float-rep rep-size drop 4 ; M: double-rep rep-size drop 8 ; M: stack-params rep-size drop cell ; M: vector-rep rep-size drop 16 ; +M: char-scalar-rep rep-size drop 1 ; +M: uchar-scalar-rep rep-size drop 1 ; +M: short-scalar-rep rep-size drop 2 ; +M: ushort-scalar-rep rep-size drop 2 ; +M: int-scalar-rep rep-size drop 4 ; +M: uint-scalar-rep rep-size drop 4 ; +M: longlong-scalar-rep rep-size drop 8 ; +M: ulonglong-scalar-rep rep-size drop 8 ; GENERIC: rep-component-type ( rep -- n ) @@ -277,24 +285,24 @@ HOOK: %unbox-any-c-ptr cpu ( dst src temp -- ) HOOK: %box-alien cpu ( dst src temp -- ) HOOK: %box-displaced-alien cpu ( dst displacement base temp1 temp2 base-class -- ) -HOOK: %alien-unsigned-1 cpu ( dst src -- ) -HOOK: %alien-unsigned-2 cpu ( dst src -- ) -HOOK: %alien-unsigned-4 cpu ( dst src -- ) -HOOK: %alien-signed-1 cpu ( dst src -- ) -HOOK: %alien-signed-2 cpu ( dst src -- ) -HOOK: %alien-signed-4 cpu ( dst src -- ) -HOOK: %alien-cell cpu ( dst src -- ) -HOOK: %alien-float cpu ( dst src -- ) -HOOK: %alien-double cpu ( dst src -- ) -HOOK: %alien-vector cpu ( dst src rep -- ) +HOOK: %alien-unsigned-1 cpu ( dst src offset -- ) +HOOK: %alien-unsigned-2 cpu ( dst src offset -- ) +HOOK: %alien-unsigned-4 cpu ( dst src offset -- ) +HOOK: %alien-signed-1 cpu ( dst src offset -- ) +HOOK: %alien-signed-2 cpu ( dst src offset -- ) +HOOK: %alien-signed-4 cpu ( dst src offset -- ) +HOOK: %alien-cell cpu ( dst src offset -- ) +HOOK: %alien-float cpu ( dst src offset -- ) +HOOK: %alien-double cpu ( dst src offset -- ) +HOOK: %alien-vector cpu ( dst src offset rep -- ) -HOOK: %set-alien-integer-1 cpu ( ptr value -- ) -HOOK: %set-alien-integer-2 cpu ( ptr value -- ) -HOOK: %set-alien-integer-4 cpu ( ptr value -- ) -HOOK: %set-alien-cell cpu ( ptr value -- ) -HOOK: %set-alien-float cpu ( ptr value -- ) -HOOK: %set-alien-double cpu ( ptr value -- ) -HOOK: %set-alien-vector cpu ( ptr value rep -- ) +HOOK: %set-alien-integer-1 cpu ( ptr offset value -- ) +HOOK: %set-alien-integer-2 cpu ( ptr offset value -- ) +HOOK: %set-alien-integer-4 cpu ( ptr offset value -- ) +HOOK: %set-alien-cell cpu ( ptr offset value -- ) +HOOK: %set-alien-float cpu ( ptr offset value -- ) +HOOK: %set-alien-double cpu ( ptr offset value -- ) +HOOK: %set-alien-vector cpu ( ptr offset value rep -- ) HOOK: %alien-global cpu ( dst symbol library -- ) HOOK: %vm-field-ptr cpu ( dst fieldname -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 14a382d6cf..eaaab19662 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -307,45 +307,45 @@ M:: x86 %set-string-nth-fast ( ch str index temp -- ) temp string-offset [+] new-ch 8-bit-version-of MOV ] with-small-register ; -:: %alien-integer-getter ( dst src size quot -- ) +:: %alien-integer-getter ( dst src offset size quot -- ) dst { src } size [| new-dst | - new-dst dup size n-bit-version-of dup src [] MOV + new-dst dup size n-bit-version-of dup src offset [+] MOV quot call dst new-dst int-rep %copy ] with-small-register ; inline -: %alien-unsigned-getter ( dst src size -- ) +: %alien-unsigned-getter ( dst src offset size -- ) [ MOVZX ] %alien-integer-getter ; inline M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ; M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ; M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ; -: %alien-signed-getter ( dst src size -- ) +: %alien-signed-getter ( dst src offset size -- ) [ MOVSX ] %alien-integer-getter ; inline M: x86 %alien-signed-1 8 %alien-signed-getter ; M: x86 %alien-signed-2 16 %alien-signed-getter ; M: x86 %alien-signed-4 32 %alien-signed-getter ; -M: x86 %alien-cell [] MOV ; -M: x86 %alien-float [] MOVSS ; -M: x86 %alien-double [] MOVSD ; -M: x86 %alien-vector [ [] ] dip %copy ; +M: x86 %alien-cell [+] MOV ; +M: x86 %alien-float [+] MOVSS ; +M: x86 %alien-double [+] MOVSD ; +M: x86 %alien-vector [ [+] ] dip %copy ; -:: %alien-integer-setter ( ptr value size -- ) +:: %alien-integer-setter ( ptr offset value size -- ) value { ptr } size [| new-value | new-value value int-rep %copy - ptr [] new-value size n-bit-version-of MOV + ptr offset [+] new-value size n-bit-version-of MOV ] with-small-register ; inline M: x86 %set-alien-integer-1 8 %alien-integer-setter ; M: x86 %set-alien-integer-2 16 %alien-integer-setter ; M: x86 %set-alien-integer-4 32 %alien-integer-setter ; -M: x86 %set-alien-cell [ [] ] dip MOV ; -M: x86 %set-alien-float [ [] ] dip MOVSS ; -M: x86 %set-alien-double [ [] ] dip MOVSD ; -M: x86 %set-alien-vector [ [] ] 2dip %copy ; +M: x86 %set-alien-cell [ [+] ] dip MOV ; +M: x86 %set-alien-float [ [+] ] dip MOVSS ; +M: x86 %set-alien-double [ [+] ] dip MOVSD ; +M: x86 %set-alien-vector [ [+] ] 2dip %copy ; : shift-count? ( reg -- ? ) { ECX RCX } memq? ; @@ -1042,8 +1042,11 @@ M: x86 %shr-vector-reps { sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } } } available-reps ; -M: x86 %integer>scalar drop MOVD ; -M: x86 %scalar>integer drop MOVD ; +: scalar-sized-reg ( reg rep -- reg' ) + rep-size 8 * n-bit-version-of ; + +M: x86 %integer>scalar scalar-sized-reg MOVD ; +M: x86 %scalar>integer swap [ scalar-sized-reg ] dip MOVD ; M: x86 %vector>scalar %copy ; M: x86 %scalar>vector %copy ;