Merge branch 'master' of factorcode.org:/git/factor
commit
d855d0a8a6
|
|
@ -38,10 +38,10 @@ M: ##spill compute-stack-frame* drop frame-required ;
|
|||
M: ##reload compute-stack-frame* drop frame-required ;
|
||||
|
||||
M: ##float>integer compute-stack-frame*
|
||||
drop cpu ppc? [ frame-required ] when ;
|
||||
drop integer-float-needs-stack-frame? [ frame-required ] when ;
|
||||
|
||||
M: ##integer>float compute-stack-frame*
|
||||
drop cpu ppc? [ frame-required ] when ;
|
||||
drop integer-float-needs-stack-frame? [ frame-required ] when ;
|
||||
|
||||
M: insn compute-stack-frame* drop ;
|
||||
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ IN: compiler.cfg.linear-scan
|
|||
: admissible-registers ( cfg -- regs )
|
||||
[ machine-registers ] dip
|
||||
frame-pointer?>> [
|
||||
[ int-regs ] dip clone
|
||||
[ int-regs ] dip [ clone ] map
|
||||
[ [ [ frame-reg ] dip remove ] change-at ] keep
|
||||
] unless ;
|
||||
] when ;
|
||||
|
||||
: linear-scan ( cfg -- cfg' )
|
||||
dup dup admissible-registers (linear-scan) ;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2009, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs fry kernel namespaces
|
||||
USING: accessors arrays assocs fry locals kernel namespaces
|
||||
sequences sequences.deep
|
||||
sets vectors
|
||||
cpu.architecture
|
||||
|
|
@ -46,35 +46,39 @@ SYMBOL: class-element-map
|
|||
! Sequence of vreg pairs
|
||||
SYMBOL: copies
|
||||
|
||||
: value-of ( vreg -- value )
|
||||
insn-of dup ##tagged>integer? [ src>> ] [ dst>> ] if ;
|
||||
|
||||
: init-coalescing ( -- )
|
||||
defs get keys
|
||||
[ [ dup ] H{ } map>assoc leader-map set ]
|
||||
[ [ dup 1vector ] H{ } map>assoc class-element-map set ] bi
|
||||
defs get
|
||||
[ [ drop dup ] assoc-map leader-map set ]
|
||||
[ [ [ dup dup value-of ] dip <vreg-info> 1array ] assoc-map class-element-map set ] bi
|
||||
V{ } clone copies set ;
|
||||
|
||||
: classes-interfere? ( vreg1 vreg2 -- ? )
|
||||
[ leader ] bi@ 2dup eq? [ 2drop f ] [
|
||||
[ class-elements flatten ] bi@ sets-interfere?
|
||||
] if ;
|
||||
|
||||
: update-leaders ( vreg1 vreg2 -- )
|
||||
: coalesce-leaders ( vreg1 vreg2 -- )
|
||||
! leader2 becomes the leader.
|
||||
swap leader-map get set-at ;
|
||||
|
||||
: merge-classes ( vreg1 vreg2 -- )
|
||||
[ [ class-elements ] bi@ push ]
|
||||
[ drop class-element-map get delete-at ] 2bi ;
|
||||
: coalesce-elements ( merged vreg1 vreg2 -- )
|
||||
! delete leader1's class, and set leader2's class to merged.
|
||||
class-element-map get [ delete-at ] [ set-at ] bi-curry bi* ;
|
||||
|
||||
: eliminate-copy ( vreg1 vreg2 -- )
|
||||
[ leader ] bi@
|
||||
2dup eq? [ 2drop ] [
|
||||
[ update-leaders ]
|
||||
[ merge-classes ]
|
||||
2bi
|
||||
] if ;
|
||||
: coalesce-vregs ( merged leader1 leader2 -- )
|
||||
[ coalesce-leaders ] [ coalesce-elements ] 2bi ;
|
||||
|
||||
:: maybe-eliminate-copy ( vreg1 vreg2 -- )
|
||||
! Eliminate a copy of possible.
|
||||
vreg1 leader :> vreg1
|
||||
vreg2 leader :> vreg2
|
||||
vreg1 vreg2 eq? [
|
||||
vreg1 class-elements vreg2 class-elements sets-interfere?
|
||||
[ drop ] [ vreg1 vreg2 coalesce-vregs ] if
|
||||
] unless ;
|
||||
|
||||
GENERIC: prepare-insn ( insn -- )
|
||||
|
||||
: try-to-coalesce ( dst src -- ) 2array copies get push ;
|
||||
: maybe-eliminate-copy-later ( dst src -- )
|
||||
2array copies get push ;
|
||||
|
||||
M: insn prepare-insn drop ;
|
||||
|
||||
|
|
@ -85,19 +89,19 @@ M: vreg-insn prepare-insn
|
|||
2dup empty? not and [
|
||||
first
|
||||
2dup [ rep-of reg-class-of ] bi@ eq?
|
||||
[ try-to-coalesce ] [ 2drop ] if
|
||||
[ maybe-eliminate-copy-later ] [ 2drop ] if
|
||||
] [ 2drop ] if
|
||||
] bi ;
|
||||
|
||||
M: ##copy prepare-insn
|
||||
[ dst>> ] [ src>> ] bi try-to-coalesce ;
|
||||
[ dst>> ] [ src>> ] bi maybe-eliminate-copy-later ;
|
||||
|
||||
M: ##tagged>integer prepare-insn
|
||||
[ dst>> ] [ src>> ] bi eliminate-copy ;
|
||||
[ dst>> ] [ src>> ] bi maybe-eliminate-copy ;
|
||||
|
||||
M: ##phi prepare-insn
|
||||
[ dst>> ] [ inputs>> values ] bi
|
||||
[ eliminate-copy ] with each ;
|
||||
[ maybe-eliminate-copy ] with each ;
|
||||
|
||||
: prepare-block ( bb -- )
|
||||
instructions>> [ prepare-insn ] each ;
|
||||
|
|
@ -107,10 +111,7 @@ M: ##phi prepare-insn
|
|||
[ prepare-block ] each-basic-block ;
|
||||
|
||||
: process-copies ( -- )
|
||||
copies get [
|
||||
2dup classes-interfere?
|
||||
[ 2drop ] [ eliminate-copy ] if
|
||||
] assoc-each ;
|
||||
copies get [ maybe-eliminate-copy ] assoc-each ;
|
||||
|
||||
GENERIC: useful-insn? ( insn -- ? )
|
||||
|
||||
|
|
@ -135,6 +136,7 @@ PRIVATE>
|
|||
|
||||
dup construct-cssa
|
||||
dup compute-defs
|
||||
dup compute-insns
|
||||
dup compute-ssa-live-sets
|
||||
dup compute-live-ranges
|
||||
dup prepare-coalescing
|
||||
|
|
|
|||
|
|
@ -2,17 +2,35 @@ USING: accessors compiler.cfg compiler.cfg.debugger
|
|||
compiler.cfg.def-use compiler.cfg.dominance
|
||||
compiler.cfg.instructions compiler.cfg.liveness.ssa
|
||||
compiler.cfg.registers compiler.cfg.predecessors
|
||||
compiler.cfg.ssa.interference
|
||||
compiler.cfg.ssa.interference.live-ranges cpu.architecture
|
||||
kernel namespaces tools.test ;
|
||||
compiler.cfg.comparisons compiler.cfg.ssa.interference
|
||||
compiler.cfg.ssa.interference.private
|
||||
compiler.cfg.ssa.interference.live-ranges
|
||||
cpu.architecture kernel namespaces tools.test alien.c-types
|
||||
arrays sequences slots ;
|
||||
IN: compiler.cfg.ssa.interference.tests
|
||||
|
||||
: test-interference ( -- )
|
||||
cfg new 0 get >>entry
|
||||
dup compute-ssa-live-sets
|
||||
dup compute-defs
|
||||
dup compute-insns
|
||||
compute-live-ranges ;
|
||||
|
||||
: <test-vreg-info> ( vreg -- info )
|
||||
[ ] [ insn-of dup ##tagged>integer? [ src>> ] [ dst>> ] if ] [ def-of ] tri
|
||||
<vreg-info> ;
|
||||
|
||||
: test-vregs-intersect? ( vreg1 vreg2 -- ? )
|
||||
[ <test-vreg-info> ] bi@ vregs-intersect? ;
|
||||
|
||||
: test-vregs-interfere? ( vreg1 vreg2 -- ? )
|
||||
[ <test-vreg-info> ] bi@
|
||||
[ blue >>color ] [ red >>color ] bi*
|
||||
vregs-interfere? ;
|
||||
|
||||
: test-sets-interfere? ( seq1 seq2 -- merged ? )
|
||||
[ [ <test-vreg-info> ] map ] bi@ sets-interfere? ;
|
||||
|
||||
V{
|
||||
T{ ##peek f 0 D 0 }
|
||||
T{ ##peek f 2 D 0 }
|
||||
|
|
@ -34,17 +52,310 @@ V{
|
|||
|
||||
[ ] [ test-interference ] unit-test
|
||||
|
||||
[ f ] [ 0 1 vregs-interfere? ] unit-test
|
||||
[ f ] [ 1 0 vregs-interfere? ] unit-test
|
||||
[ f ] [ 2 3 vregs-interfere? ] unit-test
|
||||
[ f ] [ 3 2 vregs-interfere? ] unit-test
|
||||
[ t ] [ 0 2 vregs-interfere? ] unit-test
|
||||
[ t ] [ 2 0 vregs-interfere? ] unit-test
|
||||
[ f ] [ 1 3 vregs-interfere? ] unit-test
|
||||
[ f ] [ 3 1 vregs-interfere? ] unit-test
|
||||
[ t ] [ 3 4 vregs-interfere? ] unit-test
|
||||
[ t ] [ 4 3 vregs-interfere? ] unit-test
|
||||
[ t ] [ 3 5 vregs-interfere? ] unit-test
|
||||
[ t ] [ 5 3 vregs-interfere? ] unit-test
|
||||
[ f ] [ 3 6 vregs-interfere? ] unit-test
|
||||
[ f ] [ 6 3 vregs-interfere? ] unit-test
|
||||
[ f ] [ 0 1 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 1 0 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 2 3 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 3 2 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 0 2 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 2 0 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 1 3 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 3 1 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 3 4 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 4 3 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 3 5 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 5 3 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 3 6 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 6 3 test-vregs-intersect? ] unit-test
|
||||
|
||||
V{
|
||||
T{ ##prologue }
|
||||
T{ ##branch }
|
||||
} 0 test-bb
|
||||
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -3 }
|
||||
T{ ##peek f 12 D -2 }
|
||||
T{ ##peek f 23 D -1 }
|
||||
T{ ##sar-imm f 13 23 4 }
|
||||
T{ ##peek f 24 D -3 }
|
||||
T{ ##sar-imm f 14 24 4 }
|
||||
T{ ##mul f 15 13 13 }
|
||||
T{ ##mul f 16 15 15 }
|
||||
T{ ##tagged>integer f 17 12 }
|
||||
T{ ##store-memory f 16 17 14 0 7 int-rep uchar }
|
||||
T{ ##branch }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue }
|
||||
T{ ##return }
|
||||
} 2 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 2 edge
|
||||
|
||||
[ ] [ test-interference ] unit-test
|
||||
|
||||
[ t ] [ { 15 } { 23 13 } test-sets-interfere? nip ] unit-test
|
||||
|
||||
V{
|
||||
T{ ##prologue f }
|
||||
T{ ##branch f }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f 2 }
|
||||
T{ ##peek f 32 D 2 }
|
||||
T{ ##load-reference f 33 ##check-nursery-branch }
|
||||
T{ ##load-integer f 34 11 }
|
||||
T{ ##tagged>integer f 35 32 }
|
||||
T{ ##and-imm f 36 35 15 }
|
||||
T{ ##compare-integer-imm-branch f 36 7 cc= }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##slot-imm f 48 32 1 7 }
|
||||
T{ ##slot-imm f 50 48 1 2 }
|
||||
T{ ##sar-imm f 65 50 4 }
|
||||
T{ ##compare-integer-branch f 34 65 cc<= }
|
||||
} 2 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -2 }
|
||||
T{ ##slot-imm f 57 48 11 2 }
|
||||
T{ ##compare f 58 33 57 cc= 20 }
|
||||
T{ ##replace f 58 D 0 }
|
||||
T{ ##branch f }
|
||||
} 3 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue f }
|
||||
T{ ##return f }
|
||||
} 4 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -2 }
|
||||
T{ ##replace-imm f f D 0 }
|
||||
T{ ##branch f }
|
||||
} 5 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue f }
|
||||
T{ ##return f }
|
||||
} 6 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -2 }
|
||||
T{ ##replace-imm f f D 0 }
|
||||
T{ ##branch f }
|
||||
} 7 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue f }
|
||||
T{ ##return f }
|
||||
} 8 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 { 2 7 } edges
|
||||
2 { 3 5 } edges
|
||||
3 4 edge
|
||||
5 6 edge
|
||||
7 8 edge
|
||||
|
||||
[ ] [ test-interference ] unit-test
|
||||
|
||||
[ f ] [ { 48 } { 32 35 } test-sets-interfere? nip ] unit-test
|
||||
|
||||
TUPLE: bab ;
|
||||
TUPLE: gfg { x bab } ;
|
||||
: bah ( -- x ) f ;
|
||||
|
||||
V{
|
||||
T{ ##prologue }
|
||||
T{ ##branch }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##check-nursery-branch f 16 cc<= 75 76 }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##save-context f 77 78 }
|
||||
T{ ##call-gc f { } }
|
||||
T{ ##branch }
|
||||
} 2 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f 1 }
|
||||
T{ ##load-reference f 37 T{ bab } }
|
||||
T{ ##load-reference f 38 { gfg 1 1 tuple 57438726 gfg 7785907 } }
|
||||
T{ ##allot f 40 12 tuple 4 }
|
||||
T{ ##set-slot-imm f 38 40 1 7 }
|
||||
T{ ##set-slot-imm f 37 40 2 7 }
|
||||
T{ ##replace f 40 D 0 }
|
||||
T{ ##branch }
|
||||
} 3 test-bb
|
||||
|
||||
V{
|
||||
T{ ##call f bah }
|
||||
T{ ##branch }
|
||||
} 4 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-r f 1 }
|
||||
T{ ##inc-d f 1 }
|
||||
T{ ##peek f 43 D 1 }
|
||||
T{ ##peek f 44 D 2 }
|
||||
T{ ##tagged>integer f 45 43 }
|
||||
T{ ##and-imm f 46 45 15 }
|
||||
T{ ##compare-integer-imm-branch f 46 7 cc= }
|
||||
} 5 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -1 }
|
||||
T{ ##slot-imm f 58 43 1 7 }
|
||||
T{ ##slot-imm f 60 58 7 2 }
|
||||
T{ ##compare-imm-branch f 60 bab cc= }
|
||||
} 6 test-bb
|
||||
|
||||
V{
|
||||
T{ ##branch }
|
||||
} 7 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-r f -1 }
|
||||
T{ ##inc-d f -1 }
|
||||
T{ ##set-slot-imm f 43 44 2 7 }
|
||||
T{ ##write-barrier-imm f 44 2 7 34 35 }
|
||||
T{ ##branch }
|
||||
} 8 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue }
|
||||
T{ ##return }
|
||||
} 9 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f 1 }
|
||||
T{ ##replace f 44 R 0 }
|
||||
T{ ##replace-imm f bab D 0 }
|
||||
T{ ##branch }
|
||||
} 10 test-bb
|
||||
|
||||
V{
|
||||
T{ ##call f bad-slot-value }
|
||||
T{ ##branch }
|
||||
} 11 test-bb
|
||||
|
||||
V{
|
||||
T{ ##no-tco }
|
||||
} 12 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -1 }
|
||||
T{ ##branch }
|
||||
} 13 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f 1 }
|
||||
T{ ##replace f 44 R 0 }
|
||||
T{ ##replace-imm f bab D 0 }
|
||||
T{ ##branch }
|
||||
} 14 test-bb
|
||||
|
||||
V{
|
||||
T{ ##call f bad-slot-value }
|
||||
T{ ##branch }
|
||||
} 15 test-bb
|
||||
|
||||
V{
|
||||
T{ ##no-tco }
|
||||
} 16 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 { 3 2 } edges
|
||||
2 3 edge
|
||||
3 4 edge
|
||||
4 5 edge
|
||||
5 { 6 13 } edges
|
||||
6 { 7 10 } edges
|
||||
7 8 edge
|
||||
8 9 edge
|
||||
10 11 edge
|
||||
11 12 edge
|
||||
13 14 edge
|
||||
14 15 edge
|
||||
15 16 edge
|
||||
|
||||
[ ] [ test-interference ] unit-test
|
||||
|
||||
[ t ] [ 43 45 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 43 45 test-vregs-interfere? ] unit-test
|
||||
|
||||
[ t ] [ 43 46 test-vregs-intersect? ] unit-test
|
||||
[ t ] [ 43 46 test-vregs-interfere? ] unit-test
|
||||
|
||||
[ f ] [ 45 46 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 45 46 test-vregs-interfere? ] unit-test
|
||||
|
||||
[ f ] [ { 43 } { 45 } test-sets-interfere? nip ] unit-test
|
||||
|
||||
[ t f ] [
|
||||
{ 46 } { 43 } { 45 }
|
||||
[ [ <test-vreg-info> ] map ] tri@
|
||||
sets-interfere? [ sets-interfere? nip ] dip
|
||||
] unit-test
|
||||
|
||||
V{
|
||||
T{ ##prologue f }
|
||||
T{ ##branch f }
|
||||
} 0 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f 1 }
|
||||
T{ ##peek f 31 D 1 }
|
||||
T{ ##sar-imm f 16 31 4 }
|
||||
T{ ##load-integer f 17 0 }
|
||||
T{ ##copy f 33 17 int-rep }
|
||||
T{ ##branch f }
|
||||
} 1 test-bb
|
||||
|
||||
V{
|
||||
T{ ##phi f 21 H{ { 1 33 } { 3 32 } } }
|
||||
T{ ##compare-integer-branch f 21 16 cc< }
|
||||
} 2 test-bb
|
||||
|
||||
V{
|
||||
T{ ##add-imm f 27 21 1 }
|
||||
T{ ##copy f 32 27 int-rep }
|
||||
T{ ##branch f }
|
||||
} 3 test-bb
|
||||
|
||||
V{
|
||||
T{ ##inc-d f -2 }
|
||||
T{ ##branch f }
|
||||
} 4 test-bb
|
||||
|
||||
V{
|
||||
T{ ##epilogue f }
|
||||
T{ ##return f }
|
||||
} 5 test-bb
|
||||
|
||||
0 1 edge
|
||||
1 2 edge
|
||||
2 { 3 4 } edges
|
||||
3 2 edge
|
||||
4 5 edge
|
||||
|
||||
[ ] [ test-interference ] unit-test
|
||||
|
||||
[ f f ] [
|
||||
{ 33 } { 21 } { 32 }
|
||||
[ [ <test-vreg-info> ] map ] tri@
|
||||
sets-interfere? [ sets-interfere? nip ] dip
|
||||
] unit-test
|
||||
|
||||
[ f ] [ 33 21 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 32 21 test-vregs-intersect? ] unit-test
|
||||
[ f ] [ 32 33 test-vregs-intersect? ] unit-test
|
||||
|
|
@ -1,92 +1,175 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! Copyright (C) 2009, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs combinators combinators.short-circuit fry
|
||||
kernel math math.order sorting namespaces sequences locals
|
||||
compiler.cfg.def-use compiler.cfg.dominance
|
||||
compiler.cfg.ssa.interference.live-ranges ;
|
||||
USING: accessors arrays assocs combinators
|
||||
combinators.short-circuit fry kernel math math.order sorting
|
||||
sorting.slots namespaces sequences locals compiler.cfg.def-use
|
||||
compiler.cfg.dominance compiler.cfg.ssa.interference.live-ranges ;
|
||||
IN: compiler.cfg.ssa.interference
|
||||
|
||||
! Interference testing using SSA properties. Actually the only SSA property
|
||||
! used here is that definitions dominate uses; because of this, the input
|
||||
! is allowed to have multiple definitions of each vreg as long as they're
|
||||
! all in the same basic block. This is needed because two-operand conversion
|
||||
! runs before coalescing, which uses SSA interference testing.
|
||||
! Interference testing using SSA properties.
|
||||
!
|
||||
! Based on:
|
||||
!
|
||||
! Revisiting Out-of-SSA Translation for Correctness, Code Quality, and Efficiency
|
||||
! http://hal.archives-ouvertes.fr/docs/00/34/99/25/PDF/OutSSA-RR.pdf
|
||||
|
||||
TUPLE: vreg-info vreg value def-index bb pre-of color equal-anc-in equal-anc-out ;
|
||||
|
||||
:: <vreg-info> ( vreg value bb -- info )
|
||||
vreg-info new
|
||||
vreg >>vreg
|
||||
bb >>bb
|
||||
value >>value
|
||||
bb pre-of >>pre-of
|
||||
vreg bb def-index >>def-index ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
:: kill-after-def? ( vreg1 vreg2 bb -- ? )
|
||||
! Our dominance pass computes dominance information on a
|
||||
! per-basic block level. Rig up a more fine-grained dominance
|
||||
! test here.
|
||||
: locally-dominates? ( vreg1 vreg2 -- ? )
|
||||
[ def-index>> ] bi@ < ;
|
||||
|
||||
:: vreg-dominates? ( vreg1 vreg2 -- ? )
|
||||
vreg1 bb>> :> bb1
|
||||
vreg2 bb>> :> bb2
|
||||
bb1 bb2 eq?
|
||||
[ vreg1 vreg2 locally-dominates? ] [ bb1 bb2 dominates? ] if ;
|
||||
|
||||
! Testing individual vregs for live range intersection.
|
||||
: kill-after-def? ( vreg1 vreg2 bb -- ? )
|
||||
! If first register is used after second one is defined, they interfere.
|
||||
! If they are used in the same instruction, no interference. If the
|
||||
! instruction is a def-is-use-insn, then there will be a use at +1
|
||||
! (instructions are 2 apart) and so outputs will interfere with
|
||||
! inputs.
|
||||
vreg1 bb kill-index
|
||||
vreg2 bb def-index > ;
|
||||
[ kill-index ] [ def-index ] bi-curry bi* > ;
|
||||
|
||||
:: interferes-same-block? ( vreg1 vreg2 bb1 bb2 -- ? )
|
||||
! If both are defined in the same basic block, they interfere if their
|
||||
! local live ranges intersect.
|
||||
vreg1 bb1 def-index
|
||||
vreg2 bb1 def-index <
|
||||
[ vreg1 vreg2 ] [ vreg2 vreg1 ] if
|
||||
bb1 kill-after-def? ;
|
||||
|
||||
: interferes-first-dominates? ( vreg1 vreg2 bb1 bb2 -- ? )
|
||||
: interferes-first-dominates? ( vreg1 vreg2 -- ? )
|
||||
! If vreg1 dominates vreg2, then they interfere if vreg2's definition
|
||||
! occurs before vreg1 is killed.
|
||||
nip
|
||||
kill-after-def? ;
|
||||
[ [ vreg>> ] bi@ ] [ nip bb>> ] 2bi kill-after-def? ;
|
||||
|
||||
: interferes-second-dominates? ( vreg1 vreg2 bb1 bb2 -- ? )
|
||||
: interferes-second-dominates? ( vreg1 vreg2 -- ? )
|
||||
! If vreg2 dominates vreg1, then they interfere if vreg1's definition
|
||||
! occurs before vreg2 is killed.
|
||||
drop
|
||||
swapd kill-after-def? ;
|
||||
swap interferes-first-dominates? ;
|
||||
|
||||
PRIVATE>
|
||||
: interferes-same-block? ( vreg1 vreg2 -- ? )
|
||||
! If both are defined in the same basic block, they interfere if their
|
||||
! local live ranges intersect.
|
||||
2dup locally-dominates? [ swap ] unless
|
||||
interferes-first-dominates? ;
|
||||
|
||||
: vregs-interfere? ( vreg1 vreg2 -- ? )
|
||||
2dup [ def-of ] bi@ {
|
||||
{ [ 2dup eq? ] [ interferes-same-block? ] }
|
||||
{ [ 2dup dominates? ] [ interferes-first-dominates? ] }
|
||||
{ [ 2dup swap dominates? ] [ interferes-second-dominates? ] }
|
||||
[ 2drop 2drop f ]
|
||||
:: vregs-intersect? ( vreg1 vreg2 -- ? )
|
||||
vreg1 bb>> :> bb1
|
||||
vreg2 bb>> :> bb2
|
||||
{
|
||||
{ [ bb1 bb2 eq? ] [ vreg1 vreg2 interferes-same-block? ] }
|
||||
{ [ bb1 bb2 dominates? ] [ vreg1 vreg2 interferes-first-dominates? ] }
|
||||
{ [ bb2 bb1 dominates? ] [ vreg1 vreg2 interferes-second-dominates? ] }
|
||||
[ f ]
|
||||
} cond ;
|
||||
|
||||
<PRIVATE
|
||||
! Value-based interference test.
|
||||
: chain-intersect ( vreg1 vreg2 -- vreg )
|
||||
[ 2dup { [ nip ] [ vregs-intersect? not ] } 2&& ]
|
||||
[ equal-anc-in>> ]
|
||||
while nip ;
|
||||
|
||||
! Debug this stuff later
|
||||
: update-equal-anc-out ( vreg1 vreg2 -- )
|
||||
dupd chain-intersect >>equal-anc-out drop ;
|
||||
|
||||
: quadratic-test? ( seq1 seq2 -- ? ) [ length ] bi@ + 10 < ;
|
||||
: same-sets? ( vreg1 vreg2 -- ? )
|
||||
[ color>> ] bi@ eq? ;
|
||||
|
||||
: quadratic-test ( seq1 seq2 -- ? )
|
||||
'[ _ [ vregs-interfere? ] with any? ] any? ;
|
||||
: same-values? ( vreg1 vreg2 -- ? )
|
||||
[ value>> ] bi@ eq? ;
|
||||
|
||||
: sort-vregs-by-bb ( vregs -- alist )
|
||||
defs get
|
||||
'[ dup _ at ] { } map>assoc
|
||||
[ second pre-of ] sort-with ;
|
||||
: vregs-interfere? ( vreg1 vreg2 -- ? )
|
||||
[ f >>equal-anc-out ] dip
|
||||
|
||||
: ?last ( seq -- elt/f ) [ f ] [ last ] if-empty ; inline
|
||||
2dup same-sets? [ equal-anc-out>> ] when
|
||||
|
||||
: find-parent ( dom current -- parent )
|
||||
2dup same-values?
|
||||
[ update-equal-anc-out f ] [ chain-intersect >boolean ] if ;
|
||||
|
||||
! Merging lists of vregs sorted by dominance.
|
||||
M: vreg-info <=> ( vreg1 vreg2 -- <=> )
|
||||
{ { pre-of>> <=> } { def-index>> <=> } } compare-slots ;
|
||||
|
||||
SYMBOLS: blue red ;
|
||||
|
||||
TUPLE: iterator seq n ;
|
||||
: <iterator> ( seq -- iterator ) 0 iterator boa ; inline
|
||||
: done? ( iterator -- ? ) [ seq>> length ] [ n>> ] bi = ; inline
|
||||
: this ( iterator -- obj ) [ n>> ] [ seq>> ] bi nth ; inline
|
||||
: ++ ( iterator -- ) [ 1 + ] change-n drop ; inline
|
||||
: take ( iterator -- obj ) [ this ] [ ++ ] bi ; inline
|
||||
|
||||
: blue-smaller? ( blue red -- ? )
|
||||
[ this ] bi@ before? ; inline
|
||||
|
||||
: take-blue? ( blue red -- ? )
|
||||
{
|
||||
[ nip done? ]
|
||||
[
|
||||
{
|
||||
[ drop done? not ]
|
||||
[ blue-smaller? ]
|
||||
} 2&&
|
||||
]
|
||||
} 2|| ; inline
|
||||
|
||||
: merge-sets ( blue red -- seq )
|
||||
[ <iterator> ] bi@
|
||||
[ 2dup [ done? ] both? not ]
|
||||
[
|
||||
2dup take-blue?
|
||||
[ over take blue >>color ]
|
||||
[ dup take red >>color ]
|
||||
if
|
||||
] produce 2nip ;
|
||||
|
||||
: update-for-merge ( seq -- )
|
||||
[
|
||||
dup [ equal-anc-in>> ] [ equal-anc-out>> ] bi
|
||||
2dup and [ [ vreg-dominates? ] most ] [ or ] if
|
||||
>>equal-anc-in
|
||||
drop
|
||||
] each ;
|
||||
|
||||
! Linear-time live range intersection test in a merged set.
|
||||
: find-parent ( dom current -- vreg )
|
||||
over empty? [ 2drop f ] [
|
||||
over last over dominates? [ drop last ] [
|
||||
over pop* find-parent
|
||||
] if
|
||||
over last over vreg-dominates?
|
||||
[ drop last ] [ over pop* find-parent ] if
|
||||
] if ;
|
||||
|
||||
:: linear-test ( seq1 seq2 -- ? )
|
||||
! Instead of sorting, SSA destruction should keep equivalence
|
||||
! classes sorted by merging them on append
|
||||
:: linear-interference-test ( seq -- ? )
|
||||
V{ } clone :> dom
|
||||
seq1 seq2 append sort-vregs-by-bb [| pair |
|
||||
pair first :> current
|
||||
dom current find-parent
|
||||
dup [ current vregs-interfere? ] when
|
||||
[ t ] [ current dom push f ] if
|
||||
seq [| vreg |
|
||||
dom vreg find-parent
|
||||
{ [ ] [ vreg same-sets? not ] [ vreg swap vregs-interfere? ] } 1&&
|
||||
[ t ] [ vreg dom push f ] if
|
||||
] any? ;
|
||||
|
||||
: sets-interfere-1? ( seq1 seq2 -- merged/f ? )
|
||||
[ first ] bi@
|
||||
2dup before? [ swap ] unless
|
||||
2dup same-values? [
|
||||
2dup equal-anc-in<<
|
||||
2array f
|
||||
] [
|
||||
2dup vregs-intersect?
|
||||
[ 2drop f t ] [ 2array f ] if
|
||||
] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: sets-interfere? ( seq1 seq2 -- ? )
|
||||
quadratic-test ;
|
||||
: sets-interfere? ( seq1 seq2 -- merged/f ? )
|
||||
2dup [ length 1 = ] both? [ sets-interfere-1? ] [
|
||||
merge-sets dup linear-interference-test
|
||||
[ drop f t ] [ dup update-for-merge f ] if
|
||||
] if ;
|
||||
|
|
@ -25,15 +25,23 @@ SYMBOLS: local-def-indices local-kill-indices ;
|
|||
[ 1 + ] dip [ local-kill-indices get set-at ] with each
|
||||
] if ;
|
||||
|
||||
: visit-insn ( insn n -- )
|
||||
2 * swap [ record-def ] [ record-uses ] 2bi ;
|
||||
GENERIC: record-insn ( n insn -- )
|
||||
|
||||
M: ##phi record-insn
|
||||
record-def ;
|
||||
|
||||
M: vreg-insn record-insn
|
||||
[ 2 * ] dip [ record-def ] [ record-uses ] 2bi ;
|
||||
|
||||
M: insn record-insn
|
||||
2drop ;
|
||||
|
||||
SYMBOLS: def-indices kill-indices ;
|
||||
|
||||
: compute-local-live-ranges ( bb -- )
|
||||
H{ } clone local-def-indices set
|
||||
H{ } clone local-kill-indices set
|
||||
[ instructions>> [ visit-insn ] each-index ]
|
||||
[ instructions>> [ swap record-insn ] each-index ]
|
||||
[ [ local-def-indices get ] dip def-indices get set-at ]
|
||||
[ [ local-kill-indices get ] dip kill-indices get set-at ]
|
||||
tri ;
|
||||
|
|
|
|||
|
|
@ -85,6 +85,9 @@ IN: compiler.tests.float
|
|||
[ t ] [ -0.0 [ dup 0.0 float= swap -0.0 float= or ] compile-call ] unit-test
|
||||
[ f ] [ 3.0 [ dup 0.0 float= swap -0.0 float= or ] compile-call ] unit-test
|
||||
|
||||
[ 313.0 ] [ 313 [ fixnum>float ] compile-call ] unit-test
|
||||
[ -313 ] [ -313.5 [ float>fixnum ] compile-call ] unit-test
|
||||
[ 313 ] [ 313.5 [ float>fixnum ] compile-call ] unit-test
|
||||
[ 315 315.0 ] [ 313 [ 2 fixnum+fast dup fixnum>float ] compile-call ] unit-test
|
||||
|
||||
[ t ] [ 0/0. 0/0. [ float-unordered? ] compile-call ] unit-test
|
||||
|
|
|
|||
|
|
@ -296,6 +296,8 @@ HOOK: %binary-float-function cpu ( dst src1 src2 func -- )
|
|||
HOOK: %single>double-float cpu ( dst src -- )
|
||||
HOOK: %double>single-float cpu ( dst src -- )
|
||||
|
||||
HOOK: integer-float-needs-stack-frame? cpu ( -- ? )
|
||||
|
||||
HOOK: %integer>float cpu ( dst src -- )
|
||||
HOOK: %float>integer cpu ( dst src -- )
|
||||
|
||||
|
|
|
|||
|
|
@ -190,6 +190,8 @@ M: ppc %sub-float FSUB ;
|
|||
M: ppc %mul-float FMUL ;
|
||||
M: ppc %div-float FDIV ;
|
||||
|
||||
M: ppc integer-float-needs-stack-frame? t ;
|
||||
|
||||
M:: ppc %integer>float ( dst src -- )
|
||||
HEX: 4330 scratch-reg LIS
|
||||
scratch-reg 1 0 scratch@ STW
|
||||
|
|
|
|||
|
|
@ -7,15 +7,20 @@ words compiler.constants compiler.codegen.fixup
|
|||
compiler.cfg.instructions compiler.cfg.builder
|
||||
compiler.cfg.builder.alien.boxing compiler.cfg.intrinsics
|
||||
compiler.cfg.stack-frame cpu.x86.assembler
|
||||
cpu.x86.assembler.operands cpu.x86 cpu.architecture vm ;
|
||||
cpu.x86.assembler.operands cpu.x86 cpu.architecture vm vocabs ;
|
||||
FROM: layouts => cell ;
|
||||
IN: cpu.x86.32
|
||||
|
||||
: x86-float-regs ( -- seq )
|
||||
"cpu.x86.sse" vocab
|
||||
{ XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 }
|
||||
{ ST0 ST1 ST2 ST3 ST4 ST5 ST6 }
|
||||
? ;
|
||||
|
||||
M: x86.32 machine-registers
|
||||
{
|
||||
{ int-regs { EAX ECX EDX EBP EBX } }
|
||||
{ float-regs { XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 } }
|
||||
} ;
|
||||
{ int-regs { EAX ECX EDX EBP EBX } }
|
||||
float-regs x86-float-regs 2array
|
||||
2array ;
|
||||
|
||||
M: x86.32 ds-reg ESI ;
|
||||
M: x86.32 rs-reg EDI ;
|
||||
|
|
@ -94,7 +99,7 @@ M: x86.32 param-regs
|
|||
M: x86.32 return-regs
|
||||
{
|
||||
{ int-regs { EAX EDX } }
|
||||
{ float-regs { f } }
|
||||
{ float-regs { ST0 } }
|
||||
} ;
|
||||
|
||||
M: x86.32 %prologue ( n -- )
|
||||
|
|
@ -105,11 +110,11 @@ M: x86.32 %prologue ( n -- )
|
|||
M: x86.32 %prepare-jump
|
||||
pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here ;
|
||||
|
||||
:: load-float-return ( dst x87-insn sse-insn -- )
|
||||
:: load-float-return ( dst x87-insn rep -- )
|
||||
dst register? [
|
||||
ESP 4 SUB
|
||||
ESP [] x87-insn execute
|
||||
dst ESP [] sse-insn execute
|
||||
dst ESP [] rep %copy
|
||||
ESP 4 ADD
|
||||
] [
|
||||
dst ?spill-slot x87-insn execute
|
||||
|
|
@ -118,14 +123,14 @@ M: x86.32 %prepare-jump
|
|||
M: x86.32 %load-reg-param ( dst reg rep -- )
|
||||
{
|
||||
{ int-rep [ int-rep %copy ] }
|
||||
{ float-rep [ drop \ FSTPS \ MOVSS load-float-return ] }
|
||||
{ double-rep [ drop \ FSTPL \ MOVSD load-float-return ] }
|
||||
{ float-rep [ drop \ FSTPS float-rep load-float-return ] }
|
||||
{ double-rep [ drop \ FSTPL double-rep load-float-return ] }
|
||||
} case ;
|
||||
|
||||
:: store-float-return ( src x87-insn sse-insn -- )
|
||||
:: store-float-return ( src x87-insn rep -- )
|
||||
src register? [
|
||||
ESP 4 SUB
|
||||
ESP [] src sse-insn execute
|
||||
ESP [] src rep %copy
|
||||
ESP [] x87-insn execute
|
||||
ESP 4 ADD
|
||||
] [
|
||||
|
|
@ -135,8 +140,8 @@ M: x86.32 %load-reg-param ( dst reg rep -- )
|
|||
M: x86.32 %store-reg-param ( src reg rep -- )
|
||||
{
|
||||
{ int-rep [ swap int-rep %copy ] }
|
||||
{ float-rep [ drop \ FLDS \ MOVSS store-float-return ] }
|
||||
{ double-rep [ drop \ FLDL \ MOVSD store-float-return ] }
|
||||
{ float-rep [ drop \ FLDS float-rep store-float-return ] }
|
||||
{ double-rep [ drop \ FLDL double-rep store-float-return ] }
|
||||
} case ;
|
||||
|
||||
:: call-unbox-func ( src func -- )
|
||||
|
|
|
|||
|
|
@ -179,3 +179,5 @@ USE: vocabs.loader
|
|||
{ [ os unix? ] [ "cpu.x86.64.unix" require ] }
|
||||
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
|
||||
} cond
|
||||
|
||||
check-sse
|
||||
|
|
|
|||
|
|
@ -496,6 +496,8 @@ PRIVATE>
|
|||
: FILDQ ( src -- ) { BIN: 101 f HEX: DF } 1-operand ;
|
||||
: FISTPD ( dst -- ) { BIN: 011 f HEX: DB } 1-operand ;
|
||||
: FISTPQ ( dst -- ) { BIN: 111 f HEX: DF } 1-operand ;
|
||||
: FISTTPD ( dst -- ) { BIN: 001 f HEX: DB } 1-operand ;
|
||||
: FISTTPQ ( dst -- ) { BIN: 001 f HEX: DF } 1-operand ;
|
||||
|
||||
: FLD ( dst src -- ) HEX: D9 0 x87-st0-op ;
|
||||
: FLD1 ( -- ) { HEX: D9 HEX: E8 } % ;
|
||||
|
|
|
|||
|
|
@ -15,15 +15,16 @@ REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ;
|
|||
|
||||
REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ;
|
||||
|
||||
REGISTERS: 64
|
||||
RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ;
|
||||
REGISTERS: 64 RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ;
|
||||
|
||||
REGISTERS: 128
|
||||
XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7
|
||||
XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ;
|
||||
|
||||
REGISTERS: 80
|
||||
ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 ;
|
||||
REGISTERS: 80 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 ;
|
||||
|
||||
: shuffle-down ( STn -- STn+1 )
|
||||
"register" word-prop 1 + 80 registers get at nth ;
|
||||
|
||||
PREDICATE: register < word
|
||||
"register" word-prop ;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
Slava Pestov
|
||||
Joe Groff
|
||||
|
|
@ -0,0 +1,913 @@
|
|||
! Copyright (C) 2009, 2010 Joe Groff, Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types arrays assocs combinators fry kernel locals
|
||||
macros math math.vectors namespaces quotations sequences system
|
||||
compiler.cfg.comparisons compiler.cfg.intrinsics
|
||||
compiler.codegen.fixup cpu.architecture cpu.x86
|
||||
cpu.x86.assembler cpu.x86.assembler.operands cpu.x86.features ;
|
||||
IN: cpu.x86.sse
|
||||
|
||||
! Scalar floating point with SSE2
|
||||
M: x86 %load-float <float> float-rep %load-vector ;
|
||||
M: x86 %load-double <double> double-rep %load-vector ;
|
||||
|
||||
M: float-rep copy-register* drop MOVAPS ;
|
||||
M: double-rep copy-register* drop MOVAPS ;
|
||||
|
||||
M: float-rep copy-memory* drop MOVSS ;
|
||||
M: double-rep copy-memory* drop MOVSD ;
|
||||
|
||||
M: x86 %add-float double-rep two-operand ADDSD ;
|
||||
M: x86 %sub-float double-rep two-operand SUBSD ;
|
||||
M: x86 %mul-float double-rep two-operand MULSD ;
|
||||
M: x86 %div-float double-rep two-operand DIVSD ;
|
||||
M: x86 %min-float double-rep two-operand MINSD ;
|
||||
M: x86 %max-float double-rep two-operand MAXSD ;
|
||||
M: x86 %sqrt SQRTSD ;
|
||||
|
||||
: %clear-unless-in-place ( dst src -- )
|
||||
over = [ drop ] [ dup XORPS ] if ;
|
||||
|
||||
M: x86 %single>double-float [ %clear-unless-in-place ] [ CVTSS2SD ] 2bi ;
|
||||
M: x86 %double>single-float [ %clear-unless-in-place ] [ CVTSD2SS ] 2bi ;
|
||||
|
||||
M: x86 integer-float-needs-stack-frame? f ;
|
||||
M: x86 %integer>float [ drop dup XORPS ] [ CVTSI2SD ] 2bi ;
|
||||
M: x86 %float>integer CVTTSD2SI ;
|
||||
|
||||
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
|
||||
[ COMISD ] (%compare-float) ;
|
||||
|
||||
M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
|
||||
[ UCOMISD ] (%compare-float) ;
|
||||
|
||||
M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
|
||||
[ COMISD ] (%compare-float-branch) ;
|
||||
|
||||
M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
|
||||
[ UCOMISD ] (%compare-float-branch) ;
|
||||
|
||||
! SIMD
|
||||
M: float-4-rep copy-register* drop MOVAPS ;
|
||||
M: double-2-rep copy-register* drop MOVAPS ;
|
||||
M: vector-rep copy-register* drop MOVDQA ;
|
||||
|
||||
MACRO: available-reps ( alist -- )
|
||||
! Each SSE version adds new representations and supports
|
||||
! all old ones
|
||||
unzip { } [ append ] accumulate rest swap suffix
|
||||
[ [ 1quotation ] map ] bi@ zip
|
||||
reverse [ { } ] suffix
|
||||
'[ _ cond ] ;
|
||||
|
||||
M: x86 %alien-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %zero-vector
|
||||
{
|
||||
{ double-2-rep [ dup XORPS ] }
|
||||
{ float-4-rep [ dup XORPS ] }
|
||||
[ drop dup PXOR ]
|
||||
} case ;
|
||||
|
||||
M: x86 %zero-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %fill-vector
|
||||
{
|
||||
{ double-2-rep [ dup [ XORPS ] [ CMPEQPS ] 2bi ] }
|
||||
{ float-4-rep [ dup [ XORPS ] [ CMPEQPS ] 2bi ] }
|
||||
[ drop dup PCMPEQB ]
|
||||
} case ;
|
||||
|
||||
M: x86 %fill-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||
rep signed-rep {
|
||||
{ float-4-rep [
|
||||
dst src1 float-4-rep %copy
|
||||
dst src2 UNPCKLPS
|
||||
src3 src4 UNPCKLPS
|
||||
dst src3 MOVLHPS
|
||||
] }
|
||||
{ int-4-rep [
|
||||
dst src1 int-4-rep %copy
|
||||
dst src2 PUNPCKLDQ
|
||||
src3 src4 PUNPCKLDQ
|
||||
dst src3 PUNPCKLQDQ
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86 %gather-vector-4-reps
|
||||
{
|
||||
! Can't do this with sse1 since it will want to unbox
|
||||
! double-precision floats and convert to single precision
|
||||
{ sse2? { float-4-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-int-vector-4 ( dst src1 src2 src3 src4 rep -- )
|
||||
dst rep %zero-vector
|
||||
dst src1 32-bit-version-of 0 PINSRD
|
||||
dst src2 32-bit-version-of 1 PINSRD
|
||||
dst src3 32-bit-version-of 2 PINSRD
|
||||
dst src4 32-bit-version-of 3 PINSRD ;
|
||||
|
||||
M: x86 %gather-int-vector-4-reps
|
||||
{
|
||||
{ sse4.1? { int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
|
||||
rep signed-rep {
|
||||
{ double-2-rep [
|
||||
dst src1 double-2-rep %copy
|
||||
dst src2 MOVLHPS
|
||||
] }
|
||||
{ longlong-2-rep [
|
||||
dst src1 longlong-2-rep %copy
|
||||
dst src2 PUNPCKLQDQ
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86 %gather-vector-2-reps
|
||||
{
|
||||
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86.64 %gather-int-vector-2 ( dst src1 src2 rep -- )
|
||||
dst rep %zero-vector
|
||||
dst src1 0 PINSRQ
|
||||
dst src2 1 PINSRQ ;
|
||||
|
||||
M: x86.64 %gather-int-vector-2-reps
|
||||
{
|
||||
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
:: %select-vector-32 ( dst src n rep -- )
|
||||
rep {
|
||||
{ char-16-rep [
|
||||
dst 32-bit-version-of src n PEXTRB
|
||||
dst dst 8-bit-version-of MOVSX
|
||||
] }
|
||||
{ uchar-16-rep [
|
||||
dst 32-bit-version-of src n PEXTRB
|
||||
] }
|
||||
{ short-8-rep [
|
||||
dst 32-bit-version-of src n PEXTRW
|
||||
dst dst 16-bit-version-of MOVSX
|
||||
] }
|
||||
{ ushort-8-rep [
|
||||
dst 32-bit-version-of src n PEXTRW
|
||||
] }
|
||||
{ int-4-rep [
|
||||
dst 32-bit-version-of src n PEXTRD
|
||||
dst dst 32-bit-version-of 2dup = [ 2drop ] [ MOVSX ] if
|
||||
] }
|
||||
{ uint-4-rep [
|
||||
dst 32-bit-version-of src n PEXTRD
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86.32 %select-vector
|
||||
%select-vector-32 ;
|
||||
|
||||
M: x86.32 %select-vector-reps
|
||||
{
|
||||
{ sse4.1? { uchar-16-rep char-16-rep ushort-8-rep short-8-rep uint-4-rep int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86.64 %select-vector
|
||||
{
|
||||
{ longlong-2-rep [ PEXTRQ ] }
|
||||
{ ulonglong-2-rep [ PEXTRQ ] }
|
||||
[ %select-vector-32 ]
|
||||
} case ;
|
||||
|
||||
M: x86.64 %select-vector-reps
|
||||
{
|
||||
{ sse4.1? { uchar-16-rep char-16-rep ushort-8-rep short-8-rep uint-4-rep int-4-rep ulonglong-2-rep longlong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
: sse1-float-4-shuffle ( dst shuffle -- )
|
||||
{
|
||||
{ { 0 1 2 3 } [ drop ] }
|
||||
{ { 0 1 0 1 } [ dup MOVLHPS ] }
|
||||
{ { 2 3 2 3 } [ dup MOVHLPS ] }
|
||||
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
|
||||
{ { 2 2 3 3 } [ dup UNPCKHPS ] }
|
||||
[ dupd SHUFPS ]
|
||||
} case ;
|
||||
|
||||
: float-4-shuffle ( dst shuffle -- )
|
||||
sse3? [
|
||||
{
|
||||
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
|
||||
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
|
||||
[ sse1-float-4-shuffle ]
|
||||
} case
|
||||
] [ sse1-float-4-shuffle ] if ;
|
||||
|
||||
: int-4-shuffle ( dst shuffle -- )
|
||||
{
|
||||
{ { 0 1 2 3 } [ drop ] }
|
||||
{ { 0 0 1 1 } [ dup PUNPCKLDQ ] }
|
||||
{ { 2 2 3 3 } [ dup PUNPCKHDQ ] }
|
||||
{ { 0 1 0 1 } [ dup PUNPCKLQDQ ] }
|
||||
{ { 2 3 2 3 } [ dup PUNPCKHQDQ ] }
|
||||
[ dupd PSHUFD ]
|
||||
} case ;
|
||||
|
||||
: longlong-2-shuffle ( dst shuffle -- )
|
||||
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
|
||||
|
||||
: >float-4-shuffle ( double-2-shuffle -- float-4-shuffle )
|
||||
[ 2 * { 0 1 } n+v ] map concat ;
|
||||
|
||||
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
|
||||
dst src rep %copy
|
||||
dst shuffle rep signed-rep {
|
||||
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
|
||||
{ float-4-rep [ float-4-shuffle ] }
|
||||
{ int-4-rep [ int-4-shuffle ] }
|
||||
{ longlong-2-rep [ longlong-2-shuffle ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shuffle-vector-imm-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M:: x86 %shuffle-vector-halves-imm ( dst src1 src2 shuffle rep -- )
|
||||
dst src1 src2 rep two-operand
|
||||
shuffle rep {
|
||||
{ double-2-rep [ >float-4-shuffle SHUFPS ] }
|
||||
{ float-4-rep [ SHUFPS ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shuffle-vector-halves-imm-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||
two-operand PSHUFB ;
|
||||
|
||||
M: x86 %shuffle-vector-reps
|
||||
{
|
||||
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %merge-vector-head
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
{ double-2-rep [ MOVLHPS ] }
|
||||
{ float-4-rep [ UNPCKLPS ] }
|
||||
{ longlong-2-rep [ PUNPCKLQDQ ] }
|
||||
{ int-4-rep [ PUNPCKLDQ ] }
|
||||
{ short-8-rep [ PUNPCKLWD ] }
|
||||
{ char-16-rep [ PUNPCKLBW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %merge-vector-tail
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
{ double-2-rep [ UNPCKHPD ] }
|
||||
{ float-4-rep [ UNPCKHPS ] }
|
||||
{ longlong-2-rep [ PUNPCKHQDQ ] }
|
||||
{ int-4-rep [ PUNPCKHDQ ] }
|
||||
{ short-8-rep [ PUNPCKHWD ] }
|
||||
{ char-16-rep [ PUNPCKHBW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %merge-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %signed-pack-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ int-4-rep [ PACKSSDW ] }
|
||||
{ short-8-rep [ PACKSSWB ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %signed-pack-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unsigned-pack-vector
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
{ int-4-rep [ PACKUSDW ] }
|
||||
{ short-8-rep [ PACKUSWB ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %unsigned-pack-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep } }
|
||||
{ sse4.1? { int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %tail>head-vector ( dst src rep -- )
|
||||
dup {
|
||||
{ float-4-rep [ drop UNPCKHPD ] }
|
||||
{ double-2-rep [ drop UNPCKHPD ] }
|
||||
[ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
|
||||
} case ;
|
||||
|
||||
M: x86 %unpack-vector-head ( dst src rep -- )
|
||||
{
|
||||
{ char-16-rep [ PMOVSXBW ] }
|
||||
{ uchar-16-rep [ PMOVZXBW ] }
|
||||
{ short-8-rep [ PMOVSXWD ] }
|
||||
{ ushort-8-rep [ PMOVZXWD ] }
|
||||
{ int-4-rep [ PMOVSXDQ ] }
|
||||
{ uint-4-rep [ PMOVZXDQ ] }
|
||||
{ float-4-rep [ CVTPS2PD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %unpack-vector-head-reps ( -- reps )
|
||||
{
|
||||
{ sse2? { float-4-rep } }
|
||||
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %integer>float-vector ( dst src rep -- )
|
||||
{
|
||||
{ int-4-rep [ CVTDQ2PS ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %integer>float-vector-reps
|
||||
{
|
||||
{ sse2? { int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %float>integer-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ CVTTPS2DQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %float>integer-vector-reps
|
||||
{
|
||||
{ sse2? { float-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
: (%compare-float-vector) ( dst src rep double single -- )
|
||||
[ double-2-rep eq? ] 2dip if ; inline
|
||||
|
||||
: %compare-float-vector ( dst src rep cc -- )
|
||||
{
|
||||
{ cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] }
|
||||
{ cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] }
|
||||
{ cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] }
|
||||
{ cc<>= [ [ CMPORDPD ] [ CMPORDPS ] (%compare-float-vector) ] }
|
||||
{ cc/< [ [ CMPNLTPD ] [ CMPNLTPS ] (%compare-float-vector) ] }
|
||||
{ cc/<= [ [ CMPNLEPD ] [ CMPNLEPS ] (%compare-float-vector) ] }
|
||||
{ cc/= [ [ CMPNEQPD ] [ CMPNEQPS ] (%compare-float-vector) ] }
|
||||
{ cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
|
||||
} case ;
|
||||
|
||||
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
|
||||
rep signed-rep :> rep'
|
||||
dst src rep' {
|
||||
{ longlong-2-rep [ int64 call ] }
|
||||
{ int-4-rep [ int32 call ] }
|
||||
{ short-8-rep [ int16 call ] }
|
||||
{ char-16-rep [ int8 call ] }
|
||||
} case ; inline
|
||||
|
||||
: %compare-int-vector ( dst src rep cc -- )
|
||||
{
|
||||
{ cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
|
||||
{ cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %compare-vector ( dst src1 src2 rep cc -- )
|
||||
[ [ two-operand ] keep ] dip
|
||||
over float-vector-rep?
|
||||
[ %compare-float-vector ]
|
||||
[ %compare-int-vector ] if ;
|
||||
|
||||
: %compare-vector-eq-reps ( -- reps )
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
: %compare-vector-ord-reps ( -- reps )
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep short-8-rep int-4-rep } }
|
||||
{ sse4.2? { longlong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %compare-vector-reps
|
||||
{
|
||||
{ [ dup { cc= cc/= cc/<>= cc<>= } member-eq? ] [ drop %compare-vector-eq-reps ] }
|
||||
[ drop %compare-vector-ord-reps ]
|
||||
} cond ;
|
||||
|
||||
: %compare-float-vector-ccs ( cc -- ccs not? )
|
||||
{
|
||||
{ cc< [ { { cc< f } } f ] }
|
||||
{ cc<= [ { { cc<= f } } f ] }
|
||||
{ cc> [ { { cc< t } } f ] }
|
||||
{ cc>= [ { { cc<= t } } f ] }
|
||||
{ cc= [ { { cc= f } } f ] }
|
||||
{ cc<> [ { { cc< f } { cc< t } } f ] }
|
||||
{ cc<>= [ { { cc<>= f } } f ] }
|
||||
{ cc/< [ { { cc/< f } } f ] }
|
||||
{ cc/<= [ { { cc/<= f } } f ] }
|
||||
{ cc/> [ { { cc/< t } } f ] }
|
||||
{ cc/>= [ { { cc/<= t } } f ] }
|
||||
{ cc/= [ { { cc/= f } } f ] }
|
||||
{ cc/<> [ { { cc/= f } { cc/<>= f } } f ] }
|
||||
{ cc/<>= [ { { cc/<>= f } } f ] }
|
||||
} case ;
|
||||
|
||||
: %compare-int-vector-ccs ( cc -- ccs not? )
|
||||
order-cc {
|
||||
{ cc< [ { { cc> t } } f ] }
|
||||
{ cc<= [ { { cc> f } } t ] }
|
||||
{ cc> [ { { cc> f } } f ] }
|
||||
{ cc>= [ { { cc> t } } t ] }
|
||||
{ cc= [ { { cc= f } } f ] }
|
||||
{ cc/= [ { { cc= f } } t ] }
|
||||
{ t [ { } t ] }
|
||||
{ f [ { } f ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %compare-vector-ccs
|
||||
swap float-vector-rep?
|
||||
[ %compare-float-vector-ccs ]
|
||||
[ %compare-int-vector-ccs ] if ;
|
||||
|
||||
:: %test-vector-mask ( dst temp mask vcc -- )
|
||||
vcc {
|
||||
{ vcc-any [ dst dst TEST dst temp \ CMOVNE (%boolean) ] }
|
||||
{ vcc-none [ dst dst TEST dst temp \ CMOVE (%boolean) ] }
|
||||
{ vcc-all [ dst mask CMP dst temp \ CMOVE (%boolean) ] }
|
||||
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
|
||||
} case ;
|
||||
|
||||
: %move-vector-mask ( dst src rep -- mask )
|
||||
{
|
||||
{ double-2-rep [ MOVMSKPS HEX: f ] }
|
||||
{ float-4-rep [ MOVMSKPS HEX: f ] }
|
||||
[ drop PMOVMSKB HEX: ffff ]
|
||||
} case ;
|
||||
|
||||
M:: x86 %test-vector ( dst src temp rep vcc -- )
|
||||
dst src rep %move-vector-mask :> mask
|
||||
dst temp mask vcc %test-vector-mask ;
|
||||
|
||||
:: %test-vector-mask-branch ( label temp mask vcc -- )
|
||||
vcc {
|
||||
{ vcc-any [ temp temp TEST label JNE ] }
|
||||
{ vcc-none [ temp temp TEST label JE ] }
|
||||
{ vcc-all [ temp mask CMP label JE ] }
|
||||
{ vcc-notall [ temp mask CMP label JNE ] }
|
||||
} case ;
|
||||
|
||||
M:: x86 %test-vector-branch ( label src temp rep vcc -- )
|
||||
temp src rep %move-vector-mask :> mask
|
||||
label temp mask vcc %test-vector-mask-branch ;
|
||||
|
||||
M: x86 %test-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ADDPS ] }
|
||||
{ double-2-rep [ ADDPD ] }
|
||||
{ char-16-rep [ PADDB ] }
|
||||
{ uchar-16-rep [ PADDB ] }
|
||||
{ short-8-rep [ PADDW ] }
|
||||
{ ushort-8-rep [ PADDW ] }
|
||||
{ int-4-rep [ PADDD ] }
|
||||
{ uint-4-rep [ PADDD ] }
|
||||
{ longlong-2-rep [ PADDQ ] }
|
||||
{ ulonglong-2-rep [ PADDQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %add-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PADDSB ] }
|
||||
{ uchar-16-rep [ PADDUSB ] }
|
||||
{ short-8-rep [ PADDSW ] }
|
||||
{ ushort-8-rep [ PADDUSW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %saturated-add-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ADDSUBPS ] }
|
||||
{ double-2-rep [ ADDSUBPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %add-sub-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ SUBPS ] }
|
||||
{ double-2-rep [ SUBPD ] }
|
||||
{ char-16-rep [ PSUBB ] }
|
||||
{ uchar-16-rep [ PSUBB ] }
|
||||
{ short-8-rep [ PSUBW ] }
|
||||
{ ushort-8-rep [ PSUBW ] }
|
||||
{ int-4-rep [ PSUBD ] }
|
||||
{ uint-4-rep [ PSUBD ] }
|
||||
{ longlong-2-rep [ PSUBQ ] }
|
||||
{ ulonglong-2-rep [ PSUBQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %sub-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PSUBSB ] }
|
||||
{ uchar-16-rep [ PSUBUSB ] }
|
||||
{ short-8-rep [ PSUBSW ] }
|
||||
{ ushort-8-rep [ PSUBUSW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %saturated-sub-vector-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ MULPS ] }
|
||||
{ double-2-rep [ MULPD ] }
|
||||
{ short-8-rep [ PMULLW ] }
|
||||
{ ushort-8-rep [ PMULLW ] }
|
||||
{ int-4-rep [ PMULLD ] }
|
||||
{ uint-4-rep [ PMULLD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %mul-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep short-8-rep ushort-8-rep } }
|
||||
{ sse4.1? { int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-high-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ short-8-rep [ PMULHW ] }
|
||||
{ ushort-8-rep [ PMULHUW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %mul-high-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PMADDUBSW ] }
|
||||
{ uchar-16-rep [ PMADDUBSW ] }
|
||||
{ short-8-rep [ PMADDWD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %mul-horizontal-add-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep } }
|
||||
{ ssse3? { char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ DIVPS ] }
|
||||
{ double-2-rep [ DIVPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %div-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PMINSB ] }
|
||||
{ uchar-16-rep [ PMINUB ] }
|
||||
{ short-8-rep [ PMINSW ] }
|
||||
{ ushort-8-rep [ PMINUW ] }
|
||||
{ int-4-rep [ PMINSD ] }
|
||||
{ uint-4-rep [ PMINUD ] }
|
||||
{ float-4-rep [ MINPS ] }
|
||||
{ double-2-rep [ MINPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %min-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ char-16-rep [ PMAXSB ] }
|
||||
{ uchar-16-rep [ PMAXUB ] }
|
||||
{ short-8-rep [ PMAXSW ] }
|
||||
{ ushort-8-rep [ PMAXUW ] }
|
||||
{ int-4-rep [ PMAXSD ] }
|
||||
{ uint-4-rep [ PMAXUD ] }
|
||||
{ float-4-rep [ MAXPS ] }
|
||||
{ double-2-rep [ MAXPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %max-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
|
||||
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %avg-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ uchar-16-rep [ PAVGB ] }
|
||||
{ ushort-8-rep [ PAVGW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %avg-vector-reps
|
||||
{
|
||||
{ sse2? { uchar-16-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %dot-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ HEX: ff DPPS ] }
|
||||
{ double-2-rep [ HEX: ff DPPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %dot-vector-reps
|
||||
{
|
||||
{ sse4.1? { float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sad-vector
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ uchar-16-rep [ PSADBW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %sad-vector-reps
|
||||
{
|
||||
{ sse2? { uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
signed-rep {
|
||||
{ float-4-rep [ HADDPS ] }
|
||||
{ double-2-rep [ HADDPD ] }
|
||||
{ int-4-rep [ PHADDD ] }
|
||||
{ short-8-rep [ PHADDW ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %horizontal-add-vector-reps
|
||||
{
|
||||
{ sse3? { float-4-rep double-2-rep } }
|
||||
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
|
||||
two-operand PSLLDQ ;
|
||||
|
||||
M: x86 %horizontal-shl-vector-imm-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
|
||||
two-operand PSRLDQ ;
|
||||
|
||||
M: x86 %horizontal-shr-vector-imm-reps
|
||||
{
|
||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %abs-vector ( dst src rep -- )
|
||||
{
|
||||
{ char-16-rep [ PABSB ] }
|
||||
{ short-8-rep [ PABSW ] }
|
||||
{ int-4-rep [ PABSD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %abs-vector-reps
|
||||
{
|
||||
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %sqrt-vector ( dst src rep -- )
|
||||
{
|
||||
{ float-4-rep [ SQRTPS ] }
|
||||
{ double-2-rep [ SQRTPD ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %sqrt-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %and-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ANDPS ] }
|
||||
{ double-2-rep [ ANDPS ] }
|
||||
[ drop PAND ]
|
||||
} case ;
|
||||
|
||||
M: x86 %and-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %andn-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ANDNPS ] }
|
||||
{ double-2-rep [ ANDNPS ] }
|
||||
[ drop PANDN ]
|
||||
} case ;
|
||||
|
||||
M: x86 %andn-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ ORPS ] }
|
||||
{ double-2-rep [ ORPS ] }
|
||||
[ drop POR ]
|
||||
} case ;
|
||||
|
||||
M: x86 %or-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ float-4-rep [ XORPS ] }
|
||||
{ double-2-rep [ XORPS ] }
|
||||
[ drop PXOR ]
|
||||
} case ;
|
||||
|
||||
M: x86 %xor-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ short-8-rep [ PSLLW ] }
|
||||
{ ushort-8-rep [ PSLLW ] }
|
||||
{ int-4-rep [ PSLLD ] }
|
||||
{ uint-4-rep [ PSLLD ] }
|
||||
{ longlong-2-rep [ PSLLQ ] }
|
||||
{ ulonglong-2-rep [ PSLLQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shl-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shr-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
{ short-8-rep [ PSRAW ] }
|
||||
{ ushort-8-rep [ PSRLW ] }
|
||||
{ int-4-rep [ PSRAD ] }
|
||||
{ uint-4-rep [ PSRLD ] }
|
||||
{ ulonglong-2-rep [ PSRLQ ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shr-vector-reps
|
||||
{
|
||||
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shl-vector-imm %shl-vector ;
|
||||
M: x86 %shl-vector-imm-reps %shl-vector-reps ;
|
||||
M: x86 %shr-vector-imm %shr-vector ;
|
||||
M: x86 %shr-vector-imm-reps %shr-vector-reps ;
|
||||
|
||||
: scalar-sized-reg ( reg rep -- reg' )
|
||||
rep-size 8 * n-bit-version-of ;
|
||||
|
||||
M: x86 %integer>scalar drop MOVD ;
|
||||
|
||||
:: %scalar>integer-32 ( dst src rep -- )
|
||||
rep {
|
||||
{ int-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 32-bit-version-of
|
||||
2dup eq? [ 2drop ] [ MOVSX ] if
|
||||
] }
|
||||
{ uint-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
] }
|
||||
{ short-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVSX
|
||||
] }
|
||||
{ ushort-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVZX
|
||||
] }
|
||||
{ char-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVSX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
{ uchar-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVZX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86.32 %scalar>integer ( dst src rep -- ) %scalar>integer-32 ;
|
||||
|
||||
M: x86.64 %scalar>integer ( dst src rep -- )
|
||||
{
|
||||
{ longlong-scalar-rep [ MOVD ] }
|
||||
{ ulonglong-scalar-rep [ MOVD ] }
|
||||
[ %scalar>integer-32 ]
|
||||
} case ;
|
||||
|
||||
M: x86 %vector>scalar %copy ;
|
||||
|
||||
M: x86 %scalar>vector %copy ;
|
||||
|
||||
enable-float-min/max
|
||||
|
|
@ -0,0 +1 @@
|
|||
not loaded
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
||||
|
|
@ -0,0 +1 @@
|
|||
not loaded
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
! Copyright (C) 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien.c-types combinators kernel locals system namespaces
|
||||
compiler.codegen.fixup compiler.constants
|
||||
compiler.cfg.comparisons cpu.architecture cpu.x86
|
||||
cpu.x86.assembler cpu.x86.assembler.operands ;
|
||||
IN: cpu.x86.x87
|
||||
|
||||
! x87 unit is only used if SSE2 is not available.
|
||||
|
||||
: FLD* ( src -- ) [ ST0 ] dip FLD ;
|
||||
: FSTP* ( dst -- ) ST0 FSTP ;
|
||||
|
||||
: copy-register-x87 ( dst src -- )
|
||||
2dup eq? [ 2drop ] [ FLD* shuffle-down FSTP* ] if ;
|
||||
|
||||
M: float-rep copy-register* drop copy-register-x87 ;
|
||||
M: double-rep copy-register* drop copy-register-x87 ;
|
||||
|
||||
: load-x87 ( dst src rep -- )
|
||||
{
|
||||
{ float-rep [ FLDS shuffle-down FSTP* ] }
|
||||
{ double-rep [ FLDL shuffle-down FSTP* ] }
|
||||
} case ;
|
||||
|
||||
: store-x87 ( dst src rep -- )
|
||||
{
|
||||
{ float-rep [ FLD* FSTPS ] }
|
||||
{ double-rep [ FLD* FSTPL ] }
|
||||
} case ;
|
||||
|
||||
: copy-memory-x87 ( dst src rep -- )
|
||||
{
|
||||
{ [ pick register? ] [ load-x87 ] }
|
||||
{ [ over register? ] [ store-x87 ] }
|
||||
} cond ;
|
||||
|
||||
M: float-rep copy-memory* copy-memory-x87 ;
|
||||
M: double-rep copy-memory* copy-memory-x87 ;
|
||||
|
||||
M: x86 %load-float
|
||||
0 [] FLDS
|
||||
<float> rc-absolute rel-binary-literal
|
||||
shuffle-down FSTP* ;
|
||||
|
||||
M: x86 %load-double
|
||||
0 [] FLDL
|
||||
<double> rc-absolute rel-binary-literal
|
||||
shuffle-down FSTP* ;
|
||||
|
||||
:: binary-op ( dst src1 src2 quot -- )
|
||||
src1 FLD*
|
||||
ST0 src2 shuffle-down quot call
|
||||
dst shuffle-down FSTP* ; inline
|
||||
|
||||
M: x86 %add-float [ FADD ] binary-op ;
|
||||
M: x86 %sub-float [ FSUB ] binary-op ;
|
||||
M: x86 %mul-float [ FMUL ] binary-op ;
|
||||
M: x86 %div-float [ FDIV ] binary-op ;
|
||||
|
||||
M: x86 %sqrt FLD* FSQRT shuffle-down FSTP* ;
|
||||
|
||||
M: x86 %single>double-float copy-register-x87 ;
|
||||
M: x86 %double>single-float copy-register-x87 ;
|
||||
|
||||
M: x86 integer-float-needs-stack-frame? t ;
|
||||
|
||||
M:: x86 %integer>float ( dst src -- )
|
||||
4 stack@ src MOV
|
||||
4 stack@ FILDD
|
||||
dst shuffle-down FSTP* ;
|
||||
|
||||
M:: x86 %float>integer ( dst src -- )
|
||||
src FLD*
|
||||
4 stack@ FISTTPD
|
||||
dst 4 stack@ MOV ;
|
||||
|
||||
: compare-op ( src1 src2 quot -- )
|
||||
[ ST0 ] 3dip binary-op ; inline
|
||||
|
||||
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
|
||||
[ [ FCOMI ] compare-op ] (%compare-float) ;
|
||||
|
||||
M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
|
||||
[ [ FUCOMI ] compare-op ] (%compare-float) ;
|
||||
|
||||
M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
|
||||
[ [ FCOMI ] compare-op ] (%compare-float-branch) ;
|
||||
|
||||
M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
|
||||
[ [ FUCOMI ] compare-op ] (%compare-float-branch) ;
|
||||
Loading…
Reference in New Issue