Merge branch 'master' of factorcode.org:/git/factor

db4
Joe Groff 2010-05-17 13:09:53 -07:00
commit d855d0a8a6
20 changed files with 1633 additions and 1105 deletions

View File

@ -38,10 +38,10 @@ M: ##spill compute-stack-frame* drop frame-required ;
M: ##reload compute-stack-frame* drop frame-required ;
M: ##float>integer compute-stack-frame*
drop cpu ppc? [ frame-required ] when ;
drop integer-float-needs-stack-frame? [ frame-required ] when ;
M: ##integer>float compute-stack-frame*
drop cpu ppc? [ frame-required ] when ;
drop integer-float-needs-stack-frame? [ frame-required ] when ;
M: insn compute-stack-frame* drop ;

View File

@ -40,9 +40,9 @@ IN: compiler.cfg.linear-scan
: admissible-registers ( cfg -- regs )
[ machine-registers ] dip
frame-pointer?>> [
[ int-regs ] dip clone
[ int-regs ] dip [ clone ] map
[ [ [ frame-reg ] dip remove ] change-at ] keep
] unless ;
] when ;
: linear-scan ( cfg -- cfg' )
dup dup admissible-registers (linear-scan) ;

View File

@ -1,6 +1,6 @@
! Copyright (C) 2009, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs fry kernel namespaces
USING: accessors arrays assocs fry locals kernel namespaces
sequences sequences.deep
sets vectors
cpu.architecture
@ -46,35 +46,39 @@ SYMBOL: class-element-map
! Sequence of vreg pairs
SYMBOL: copies
: value-of ( vreg -- value )
insn-of dup ##tagged>integer? [ src>> ] [ dst>> ] if ;
: init-coalescing ( -- )
defs get keys
[ [ dup ] H{ } map>assoc leader-map set ]
[ [ dup 1vector ] H{ } map>assoc class-element-map set ] bi
defs get
[ [ drop dup ] assoc-map leader-map set ]
[ [ [ dup dup value-of ] dip <vreg-info> 1array ] assoc-map class-element-map set ] bi
V{ } clone copies set ;
: classes-interfere? ( vreg1 vreg2 -- ? )
[ leader ] bi@ 2dup eq? [ 2drop f ] [
[ class-elements flatten ] bi@ sets-interfere?
] if ;
: update-leaders ( vreg1 vreg2 -- )
: coalesce-leaders ( vreg1 vreg2 -- )
! leader2 becomes the leader.
swap leader-map get set-at ;
: merge-classes ( vreg1 vreg2 -- )
[ [ class-elements ] bi@ push ]
[ drop class-element-map get delete-at ] 2bi ;
: coalesce-elements ( merged vreg1 vreg2 -- )
! delete leader1's class, and set leader2's class to merged.
class-element-map get [ delete-at ] [ set-at ] bi-curry bi* ;
: eliminate-copy ( vreg1 vreg2 -- )
[ leader ] bi@
2dup eq? [ 2drop ] [
[ update-leaders ]
[ merge-classes ]
2bi
] if ;
: coalesce-vregs ( merged leader1 leader2 -- )
[ coalesce-leaders ] [ coalesce-elements ] 2bi ;
:: maybe-eliminate-copy ( vreg1 vreg2 -- )
! Eliminate a copy of possible.
vreg1 leader :> vreg1
vreg2 leader :> vreg2
vreg1 vreg2 eq? [
vreg1 class-elements vreg2 class-elements sets-interfere?
[ drop ] [ vreg1 vreg2 coalesce-vregs ] if
] unless ;
GENERIC: prepare-insn ( insn -- )
: try-to-coalesce ( dst src -- ) 2array copies get push ;
: maybe-eliminate-copy-later ( dst src -- )
2array copies get push ;
M: insn prepare-insn drop ;
@ -85,19 +89,19 @@ M: vreg-insn prepare-insn
2dup empty? not and [
first
2dup [ rep-of reg-class-of ] bi@ eq?
[ try-to-coalesce ] [ 2drop ] if
[ maybe-eliminate-copy-later ] [ 2drop ] if
] [ 2drop ] if
] bi ;
M: ##copy prepare-insn
[ dst>> ] [ src>> ] bi try-to-coalesce ;
[ dst>> ] [ src>> ] bi maybe-eliminate-copy-later ;
M: ##tagged>integer prepare-insn
[ dst>> ] [ src>> ] bi eliminate-copy ;
[ dst>> ] [ src>> ] bi maybe-eliminate-copy ;
M: ##phi prepare-insn
[ dst>> ] [ inputs>> values ] bi
[ eliminate-copy ] with each ;
[ maybe-eliminate-copy ] with each ;
: prepare-block ( bb -- )
instructions>> [ prepare-insn ] each ;
@ -107,10 +111,7 @@ M: ##phi prepare-insn
[ prepare-block ] each-basic-block ;
: process-copies ( -- )
copies get [
2dup classes-interfere?
[ 2drop ] [ eliminate-copy ] if
] assoc-each ;
copies get [ maybe-eliminate-copy ] assoc-each ;
GENERIC: useful-insn? ( insn -- ? )
@ -135,6 +136,7 @@ PRIVATE>
dup construct-cssa
dup compute-defs
dup compute-insns
dup compute-ssa-live-sets
dup compute-live-ranges
dup prepare-coalescing

View File

@ -2,17 +2,35 @@ USING: accessors compiler.cfg compiler.cfg.debugger
compiler.cfg.def-use compiler.cfg.dominance
compiler.cfg.instructions compiler.cfg.liveness.ssa
compiler.cfg.registers compiler.cfg.predecessors
compiler.cfg.ssa.interference
compiler.cfg.ssa.interference.live-ranges cpu.architecture
kernel namespaces tools.test ;
compiler.cfg.comparisons compiler.cfg.ssa.interference
compiler.cfg.ssa.interference.private
compiler.cfg.ssa.interference.live-ranges
cpu.architecture kernel namespaces tools.test alien.c-types
arrays sequences slots ;
IN: compiler.cfg.ssa.interference.tests
: test-interference ( -- )
cfg new 0 get >>entry
dup compute-ssa-live-sets
dup compute-defs
dup compute-insns
compute-live-ranges ;
: <test-vreg-info> ( vreg -- info )
[ ] [ insn-of dup ##tagged>integer? [ src>> ] [ dst>> ] if ] [ def-of ] tri
<vreg-info> ;
: test-vregs-intersect? ( vreg1 vreg2 -- ? )
[ <test-vreg-info> ] bi@ vregs-intersect? ;
: test-vregs-interfere? ( vreg1 vreg2 -- ? )
[ <test-vreg-info> ] bi@
[ blue >>color ] [ red >>color ] bi*
vregs-interfere? ;
: test-sets-interfere? ( seq1 seq2 -- merged ? )
[ [ <test-vreg-info> ] map ] bi@ sets-interfere? ;
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 2 D 0 }
@ -34,17 +52,310 @@ V{
[ ] [ test-interference ] unit-test
[ f ] [ 0 1 vregs-interfere? ] unit-test
[ f ] [ 1 0 vregs-interfere? ] unit-test
[ f ] [ 2 3 vregs-interfere? ] unit-test
[ f ] [ 3 2 vregs-interfere? ] unit-test
[ t ] [ 0 2 vregs-interfere? ] unit-test
[ t ] [ 2 0 vregs-interfere? ] unit-test
[ f ] [ 1 3 vregs-interfere? ] unit-test
[ f ] [ 3 1 vregs-interfere? ] unit-test
[ t ] [ 3 4 vregs-interfere? ] unit-test
[ t ] [ 4 3 vregs-interfere? ] unit-test
[ t ] [ 3 5 vregs-interfere? ] unit-test
[ t ] [ 5 3 vregs-interfere? ] unit-test
[ f ] [ 3 6 vregs-interfere? ] unit-test
[ f ] [ 6 3 vregs-interfere? ] unit-test
[ f ] [ 0 1 test-vregs-intersect? ] unit-test
[ f ] [ 1 0 test-vregs-intersect? ] unit-test
[ f ] [ 2 3 test-vregs-intersect? ] unit-test
[ f ] [ 3 2 test-vregs-intersect? ] unit-test
[ t ] [ 0 2 test-vregs-intersect? ] unit-test
[ t ] [ 2 0 test-vregs-intersect? ] unit-test
[ f ] [ 1 3 test-vregs-intersect? ] unit-test
[ f ] [ 3 1 test-vregs-intersect? ] unit-test
[ t ] [ 3 4 test-vregs-intersect? ] unit-test
[ t ] [ 4 3 test-vregs-intersect? ] unit-test
[ t ] [ 3 5 test-vregs-intersect? ] unit-test
[ t ] [ 5 3 test-vregs-intersect? ] unit-test
[ f ] [ 3 6 test-vregs-intersect? ] unit-test
[ f ] [ 6 3 test-vregs-intersect? ] unit-test
V{
T{ ##prologue }
T{ ##branch }
} 0 test-bb
V{
T{ ##inc-d f -3 }
T{ ##peek f 12 D -2 }
T{ ##peek f 23 D -1 }
T{ ##sar-imm f 13 23 4 }
T{ ##peek f 24 D -3 }
T{ ##sar-imm f 14 24 4 }
T{ ##mul f 15 13 13 }
T{ ##mul f 16 15 15 }
T{ ##tagged>integer f 17 12 }
T{ ##store-memory f 16 17 14 0 7 int-rep uchar }
T{ ##branch }
} 1 test-bb
V{
T{ ##epilogue }
T{ ##return }
} 2 test-bb
0 1 edge
1 2 edge
[ ] [ test-interference ] unit-test
[ t ] [ { 15 } { 23 13 } test-sets-interfere? nip ] unit-test
V{
T{ ##prologue f }
T{ ##branch f }
} 0 test-bb
V{
T{ ##inc-d f 2 }
T{ ##peek f 32 D 2 }
T{ ##load-reference f 33 ##check-nursery-branch }
T{ ##load-integer f 34 11 }
T{ ##tagged>integer f 35 32 }
T{ ##and-imm f 36 35 15 }
T{ ##compare-integer-imm-branch f 36 7 cc= }
} 1 test-bb
V{
T{ ##slot-imm f 48 32 1 7 }
T{ ##slot-imm f 50 48 1 2 }
T{ ##sar-imm f 65 50 4 }
T{ ##compare-integer-branch f 34 65 cc<= }
} 2 test-bb
V{
T{ ##inc-d f -2 }
T{ ##slot-imm f 57 48 11 2 }
T{ ##compare f 58 33 57 cc= 20 }
T{ ##replace f 58 D 0 }
T{ ##branch f }
} 3 test-bb
V{
T{ ##epilogue f }
T{ ##return f }
} 4 test-bb
V{
T{ ##inc-d f -2 }
T{ ##replace-imm f f D 0 }
T{ ##branch f }
} 5 test-bb
V{
T{ ##epilogue f }
T{ ##return f }
} 6 test-bb
V{
T{ ##inc-d f -2 }
T{ ##replace-imm f f D 0 }
T{ ##branch f }
} 7 test-bb
V{
T{ ##epilogue f }
T{ ##return f }
} 8 test-bb
0 1 edge
1 { 2 7 } edges
2 { 3 5 } edges
3 4 edge
5 6 edge
7 8 edge
[ ] [ test-interference ] unit-test
[ f ] [ { 48 } { 32 35 } test-sets-interfere? nip ] unit-test
TUPLE: bab ;
TUPLE: gfg { x bab } ;
: bah ( -- x ) f ;
V{
T{ ##prologue }
T{ ##branch }
} 0 test-bb
V{
T{ ##check-nursery-branch f 16 cc<= 75 76 }
} 1 test-bb
V{
T{ ##save-context f 77 78 }
T{ ##call-gc f { } }
T{ ##branch }
} 2 test-bb
V{
T{ ##inc-d f 1 }
T{ ##load-reference f 37 T{ bab } }
T{ ##load-reference f 38 { gfg 1 1 tuple 57438726 gfg 7785907 } }
T{ ##allot f 40 12 tuple 4 }
T{ ##set-slot-imm f 38 40 1 7 }
T{ ##set-slot-imm f 37 40 2 7 }
T{ ##replace f 40 D 0 }
T{ ##branch }
} 3 test-bb
V{
T{ ##call f bah }
T{ ##branch }
} 4 test-bb
V{
T{ ##inc-r f 1 }
T{ ##inc-d f 1 }
T{ ##peek f 43 D 1 }
T{ ##peek f 44 D 2 }
T{ ##tagged>integer f 45 43 }
T{ ##and-imm f 46 45 15 }
T{ ##compare-integer-imm-branch f 46 7 cc= }
} 5 test-bb
V{
T{ ##inc-d f -1 }
T{ ##slot-imm f 58 43 1 7 }
T{ ##slot-imm f 60 58 7 2 }
T{ ##compare-imm-branch f 60 bab cc= }
} 6 test-bb
V{
T{ ##branch }
} 7 test-bb
V{
T{ ##inc-r f -1 }
T{ ##inc-d f -1 }
T{ ##set-slot-imm f 43 44 2 7 }
T{ ##write-barrier-imm f 44 2 7 34 35 }
T{ ##branch }
} 8 test-bb
V{
T{ ##epilogue }
T{ ##return }
} 9 test-bb
V{
T{ ##inc-d f 1 }
T{ ##replace f 44 R 0 }
T{ ##replace-imm f bab D 0 }
T{ ##branch }
} 10 test-bb
V{
T{ ##call f bad-slot-value }
T{ ##branch }
} 11 test-bb
V{
T{ ##no-tco }
} 12 test-bb
V{
T{ ##inc-d f -1 }
T{ ##branch }
} 13 test-bb
V{
T{ ##inc-d f 1 }
T{ ##replace f 44 R 0 }
T{ ##replace-imm f bab D 0 }
T{ ##branch }
} 14 test-bb
V{
T{ ##call f bad-slot-value }
T{ ##branch }
} 15 test-bb
V{
T{ ##no-tco }
} 16 test-bb
0 1 edge
1 { 3 2 } edges
2 3 edge
3 4 edge
4 5 edge
5 { 6 13 } edges
6 { 7 10 } edges
7 8 edge
8 9 edge
10 11 edge
11 12 edge
13 14 edge
14 15 edge
15 16 edge
[ ] [ test-interference ] unit-test
[ t ] [ 43 45 test-vregs-intersect? ] unit-test
[ f ] [ 43 45 test-vregs-interfere? ] unit-test
[ t ] [ 43 46 test-vregs-intersect? ] unit-test
[ t ] [ 43 46 test-vregs-interfere? ] unit-test
[ f ] [ 45 46 test-vregs-intersect? ] unit-test
[ f ] [ 45 46 test-vregs-interfere? ] unit-test
[ f ] [ { 43 } { 45 } test-sets-interfere? nip ] unit-test
[ t f ] [
{ 46 } { 43 } { 45 }
[ [ <test-vreg-info> ] map ] tri@
sets-interfere? [ sets-interfere? nip ] dip
] unit-test
V{
T{ ##prologue f }
T{ ##branch f }
} 0 test-bb
V{
T{ ##inc-d f 1 }
T{ ##peek f 31 D 1 }
T{ ##sar-imm f 16 31 4 }
T{ ##load-integer f 17 0 }
T{ ##copy f 33 17 int-rep }
T{ ##branch f }
} 1 test-bb
V{
T{ ##phi f 21 H{ { 1 33 } { 3 32 } } }
T{ ##compare-integer-branch f 21 16 cc< }
} 2 test-bb
V{
T{ ##add-imm f 27 21 1 }
T{ ##copy f 32 27 int-rep }
T{ ##branch f }
} 3 test-bb
V{
T{ ##inc-d f -2 }
T{ ##branch f }
} 4 test-bb
V{
T{ ##epilogue f }
T{ ##return f }
} 5 test-bb
0 1 edge
1 2 edge
2 { 3 4 } edges
3 2 edge
4 5 edge
[ ] [ test-interference ] unit-test
[ f f ] [
{ 33 } { 21 } { 32 }
[ [ <test-vreg-info> ] map ] tri@
sets-interfere? [ sets-interfere? nip ] dip
] unit-test
[ f ] [ 33 21 test-vregs-intersect? ] unit-test
[ f ] [ 32 21 test-vregs-intersect? ] unit-test
[ f ] [ 32 33 test-vregs-intersect? ] unit-test

View File

@ -1,92 +1,175 @@
! Copyright (C) 2009 Slava Pestov.
! Copyright (C) 2009, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs combinators combinators.short-circuit fry
kernel math math.order sorting namespaces sequences locals
compiler.cfg.def-use compiler.cfg.dominance
compiler.cfg.ssa.interference.live-ranges ;
USING: accessors arrays assocs combinators
combinators.short-circuit fry kernel math math.order sorting
sorting.slots namespaces sequences locals compiler.cfg.def-use
compiler.cfg.dominance compiler.cfg.ssa.interference.live-ranges ;
IN: compiler.cfg.ssa.interference
! Interference testing using SSA properties. Actually the only SSA property
! used here is that definitions dominate uses; because of this, the input
! is allowed to have multiple definitions of each vreg as long as they're
! all in the same basic block. This is needed because two-operand conversion
! runs before coalescing, which uses SSA interference testing.
! Interference testing using SSA properties.
!
! Based on:
!
! Revisiting Out-of-SSA Translation for Correctness, Code Quality, and Efficiency
! http://hal.archives-ouvertes.fr/docs/00/34/99/25/PDF/OutSSA-RR.pdf
TUPLE: vreg-info vreg value def-index bb pre-of color equal-anc-in equal-anc-out ;
:: <vreg-info> ( vreg value bb -- info )
vreg-info new
vreg >>vreg
bb >>bb
value >>value
bb pre-of >>pre-of
vreg bb def-index >>def-index ;
<PRIVATE
:: kill-after-def? ( vreg1 vreg2 bb -- ? )
! Our dominance pass computes dominance information on a
! per-basic block level. Rig up a more fine-grained dominance
! test here.
: locally-dominates? ( vreg1 vreg2 -- ? )
[ def-index>> ] bi@ < ;
:: vreg-dominates? ( vreg1 vreg2 -- ? )
vreg1 bb>> :> bb1
vreg2 bb>> :> bb2
bb1 bb2 eq?
[ vreg1 vreg2 locally-dominates? ] [ bb1 bb2 dominates? ] if ;
! Testing individual vregs for live range intersection.
: kill-after-def? ( vreg1 vreg2 bb -- ? )
! If first register is used after second one is defined, they interfere.
! If they are used in the same instruction, no interference. If the
! instruction is a def-is-use-insn, then there will be a use at +1
! (instructions are 2 apart) and so outputs will interfere with
! inputs.
vreg1 bb kill-index
vreg2 bb def-index > ;
[ kill-index ] [ def-index ] bi-curry bi* > ;
:: interferes-same-block? ( vreg1 vreg2 bb1 bb2 -- ? )
! If both are defined in the same basic block, they interfere if their
! local live ranges intersect.
vreg1 bb1 def-index
vreg2 bb1 def-index <
[ vreg1 vreg2 ] [ vreg2 vreg1 ] if
bb1 kill-after-def? ;
: interferes-first-dominates? ( vreg1 vreg2 bb1 bb2 -- ? )
: interferes-first-dominates? ( vreg1 vreg2 -- ? )
! If vreg1 dominates vreg2, then they interfere if vreg2's definition
! occurs before vreg1 is killed.
nip
kill-after-def? ;
[ [ vreg>> ] bi@ ] [ nip bb>> ] 2bi kill-after-def? ;
: interferes-second-dominates? ( vreg1 vreg2 bb1 bb2 -- ? )
: interferes-second-dominates? ( vreg1 vreg2 -- ? )
! If vreg2 dominates vreg1, then they interfere if vreg1's definition
! occurs before vreg2 is killed.
drop
swapd kill-after-def? ;
swap interferes-first-dominates? ;
PRIVATE>
: interferes-same-block? ( vreg1 vreg2 -- ? )
! If both are defined in the same basic block, they interfere if their
! local live ranges intersect.
2dup locally-dominates? [ swap ] unless
interferes-first-dominates? ;
: vregs-interfere? ( vreg1 vreg2 -- ? )
2dup [ def-of ] bi@ {
{ [ 2dup eq? ] [ interferes-same-block? ] }
{ [ 2dup dominates? ] [ interferes-first-dominates? ] }
{ [ 2dup swap dominates? ] [ interferes-second-dominates? ] }
[ 2drop 2drop f ]
:: vregs-intersect? ( vreg1 vreg2 -- ? )
vreg1 bb>> :> bb1
vreg2 bb>> :> bb2
{
{ [ bb1 bb2 eq? ] [ vreg1 vreg2 interferes-same-block? ] }
{ [ bb1 bb2 dominates? ] [ vreg1 vreg2 interferes-first-dominates? ] }
{ [ bb2 bb1 dominates? ] [ vreg1 vreg2 interferes-second-dominates? ] }
[ f ]
} cond ;
<PRIVATE
! Value-based interference test.
: chain-intersect ( vreg1 vreg2 -- vreg )
[ 2dup { [ nip ] [ vregs-intersect? not ] } 2&& ]
[ equal-anc-in>> ]
while nip ;
! Debug this stuff later
: update-equal-anc-out ( vreg1 vreg2 -- )
dupd chain-intersect >>equal-anc-out drop ;
: quadratic-test? ( seq1 seq2 -- ? ) [ length ] bi@ + 10 < ;
: same-sets? ( vreg1 vreg2 -- ? )
[ color>> ] bi@ eq? ;
: quadratic-test ( seq1 seq2 -- ? )
'[ _ [ vregs-interfere? ] with any? ] any? ;
: same-values? ( vreg1 vreg2 -- ? )
[ value>> ] bi@ eq? ;
: sort-vregs-by-bb ( vregs -- alist )
defs get
'[ dup _ at ] { } map>assoc
[ second pre-of ] sort-with ;
: vregs-interfere? ( vreg1 vreg2 -- ? )
[ f >>equal-anc-out ] dip
: ?last ( seq -- elt/f ) [ f ] [ last ] if-empty ; inline
2dup same-sets? [ equal-anc-out>> ] when
: find-parent ( dom current -- parent )
2dup same-values?
[ update-equal-anc-out f ] [ chain-intersect >boolean ] if ;
! Merging lists of vregs sorted by dominance.
M: vreg-info <=> ( vreg1 vreg2 -- <=> )
{ { pre-of>> <=> } { def-index>> <=> } } compare-slots ;
SYMBOLS: blue red ;
TUPLE: iterator seq n ;
: <iterator> ( seq -- iterator ) 0 iterator boa ; inline
: done? ( iterator -- ? ) [ seq>> length ] [ n>> ] bi = ; inline
: this ( iterator -- obj ) [ n>> ] [ seq>> ] bi nth ; inline
: ++ ( iterator -- ) [ 1 + ] change-n drop ; inline
: take ( iterator -- obj ) [ this ] [ ++ ] bi ; inline
: blue-smaller? ( blue red -- ? )
[ this ] bi@ before? ; inline
: take-blue? ( blue red -- ? )
{
[ nip done? ]
[
{
[ drop done? not ]
[ blue-smaller? ]
} 2&&
]
} 2|| ; inline
: merge-sets ( blue red -- seq )
[ <iterator> ] bi@
[ 2dup [ done? ] both? not ]
[
2dup take-blue?
[ over take blue >>color ]
[ dup take red >>color ]
if
] produce 2nip ;
: update-for-merge ( seq -- )
[
dup [ equal-anc-in>> ] [ equal-anc-out>> ] bi
2dup and [ [ vreg-dominates? ] most ] [ or ] if
>>equal-anc-in
drop
] each ;
! Linear-time live range intersection test in a merged set.
: find-parent ( dom current -- vreg )
over empty? [ 2drop f ] [
over last over dominates? [ drop last ] [
over pop* find-parent
] if
over last over vreg-dominates?
[ drop last ] [ over pop* find-parent ] if
] if ;
:: linear-test ( seq1 seq2 -- ? )
! Instead of sorting, SSA destruction should keep equivalence
! classes sorted by merging them on append
:: linear-interference-test ( seq -- ? )
V{ } clone :> dom
seq1 seq2 append sort-vregs-by-bb [| pair |
pair first :> current
dom current find-parent
dup [ current vregs-interfere? ] when
[ t ] [ current dom push f ] if
seq [| vreg |
dom vreg find-parent
{ [ ] [ vreg same-sets? not ] [ vreg swap vregs-interfere? ] } 1&&
[ t ] [ vreg dom push f ] if
] any? ;
: sets-interfere-1? ( seq1 seq2 -- merged/f ? )
[ first ] bi@
2dup before? [ swap ] unless
2dup same-values? [
2dup equal-anc-in<<
2array f
] [
2dup vregs-intersect?
[ 2drop f t ] [ 2array f ] if
] if ;
PRIVATE>
: sets-interfere? ( seq1 seq2 -- ? )
quadratic-test ;
: sets-interfere? ( seq1 seq2 -- merged/f ? )
2dup [ length 1 = ] both? [ sets-interfere-1? ] [
merge-sets dup linear-interference-test
[ drop f t ] [ dup update-for-merge f ] if
] if ;

View File

@ -25,15 +25,23 @@ SYMBOLS: local-def-indices local-kill-indices ;
[ 1 + ] dip [ local-kill-indices get set-at ] with each
] if ;
: visit-insn ( insn n -- )
2 * swap [ record-def ] [ record-uses ] 2bi ;
GENERIC: record-insn ( n insn -- )
M: ##phi record-insn
record-def ;
M: vreg-insn record-insn
[ 2 * ] dip [ record-def ] [ record-uses ] 2bi ;
M: insn record-insn
2drop ;
SYMBOLS: def-indices kill-indices ;
: compute-local-live-ranges ( bb -- )
H{ } clone local-def-indices set
H{ } clone local-kill-indices set
[ instructions>> [ visit-insn ] each-index ]
[ instructions>> [ swap record-insn ] each-index ]
[ [ local-def-indices get ] dip def-indices get set-at ]
[ [ local-kill-indices get ] dip kill-indices get set-at ]
tri ;

View File

@ -85,6 +85,9 @@ IN: compiler.tests.float
[ t ] [ -0.0 [ dup 0.0 float= swap -0.0 float= or ] compile-call ] unit-test
[ f ] [ 3.0 [ dup 0.0 float= swap -0.0 float= or ] compile-call ] unit-test
[ 313.0 ] [ 313 [ fixnum>float ] compile-call ] unit-test
[ -313 ] [ -313.5 [ float>fixnum ] compile-call ] unit-test
[ 313 ] [ 313.5 [ float>fixnum ] compile-call ] unit-test
[ 315 315.0 ] [ 313 [ 2 fixnum+fast dup fixnum>float ] compile-call ] unit-test
[ t ] [ 0/0. 0/0. [ float-unordered? ] compile-call ] unit-test

View File

@ -296,6 +296,8 @@ HOOK: %binary-float-function cpu ( dst src1 src2 func -- )
HOOK: %single>double-float cpu ( dst src -- )
HOOK: %double>single-float cpu ( dst src -- )
HOOK: integer-float-needs-stack-frame? cpu ( -- ? )
HOOK: %integer>float cpu ( dst src -- )
HOOK: %float>integer cpu ( dst src -- )

View File

@ -190,6 +190,8 @@ M: ppc %sub-float FSUB ;
M: ppc %mul-float FMUL ;
M: ppc %div-float FDIV ;
M: ppc integer-float-needs-stack-frame? t ;
M:: ppc %integer>float ( dst src -- )
HEX: 4330 scratch-reg LIS
scratch-reg 1 0 scratch@ STW

View File

@ -7,15 +7,20 @@ words compiler.constants compiler.codegen.fixup
compiler.cfg.instructions compiler.cfg.builder
compiler.cfg.builder.alien.boxing compiler.cfg.intrinsics
compiler.cfg.stack-frame cpu.x86.assembler
cpu.x86.assembler.operands cpu.x86 cpu.architecture vm ;
cpu.x86.assembler.operands cpu.x86 cpu.architecture vm vocabs ;
FROM: layouts => cell ;
IN: cpu.x86.32
: x86-float-regs ( -- seq )
"cpu.x86.sse" vocab
{ XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 }
{ ST0 ST1 ST2 ST3 ST4 ST5 ST6 }
? ;
M: x86.32 machine-registers
{
{ int-regs { EAX ECX EDX EBP EBX } }
{ float-regs { XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 } }
} ;
{ int-regs { EAX ECX EDX EBP EBX } }
float-regs x86-float-regs 2array
2array ;
M: x86.32 ds-reg ESI ;
M: x86.32 rs-reg EDI ;
@ -94,7 +99,7 @@ M: x86.32 param-regs
M: x86.32 return-regs
{
{ int-regs { EAX EDX } }
{ float-regs { f } }
{ float-regs { ST0 } }
} ;
M: x86.32 %prologue ( n -- )
@ -105,11 +110,11 @@ M: x86.32 %prologue ( n -- )
M: x86.32 %prepare-jump
pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here ;
:: load-float-return ( dst x87-insn sse-insn -- )
:: load-float-return ( dst x87-insn rep -- )
dst register? [
ESP 4 SUB
ESP [] x87-insn execute
dst ESP [] sse-insn execute
dst ESP [] rep %copy
ESP 4 ADD
] [
dst ?spill-slot x87-insn execute
@ -118,14 +123,14 @@ M: x86.32 %prepare-jump
M: x86.32 %load-reg-param ( dst reg rep -- )
{
{ int-rep [ int-rep %copy ] }
{ float-rep [ drop \ FSTPS \ MOVSS load-float-return ] }
{ double-rep [ drop \ FSTPL \ MOVSD load-float-return ] }
{ float-rep [ drop \ FSTPS float-rep load-float-return ] }
{ double-rep [ drop \ FSTPL double-rep load-float-return ] }
} case ;
:: store-float-return ( src x87-insn sse-insn -- )
:: store-float-return ( src x87-insn rep -- )
src register? [
ESP 4 SUB
ESP [] src sse-insn execute
ESP [] src rep %copy
ESP [] x87-insn execute
ESP 4 ADD
] [
@ -135,8 +140,8 @@ M: x86.32 %load-reg-param ( dst reg rep -- )
M: x86.32 %store-reg-param ( src reg rep -- )
{
{ int-rep [ swap int-rep %copy ] }
{ float-rep [ drop \ FLDS \ MOVSS store-float-return ] }
{ double-rep [ drop \ FLDL \ MOVSD store-float-return ] }
{ float-rep [ drop \ FLDS float-rep store-float-return ] }
{ double-rep [ drop \ FLDL double-rep store-float-return ] }
} case ;
:: call-unbox-func ( src func -- )

View File

@ -179,3 +179,5 @@ USE: vocabs.loader
{ [ os unix? ] [ "cpu.x86.64.unix" require ] }
{ [ os winnt? ] [ "cpu.x86.64.winnt" require ] }
} cond
check-sse

View File

@ -496,6 +496,8 @@ PRIVATE>
: FILDQ ( src -- ) { BIN: 101 f HEX: DF } 1-operand ;
: FISTPD ( dst -- ) { BIN: 011 f HEX: DB } 1-operand ;
: FISTPQ ( dst -- ) { BIN: 111 f HEX: DF } 1-operand ;
: FISTTPD ( dst -- ) { BIN: 001 f HEX: DB } 1-operand ;
: FISTTPQ ( dst -- ) { BIN: 001 f HEX: DF } 1-operand ;
: FLD ( dst src -- ) HEX: D9 0 x87-st0-op ;
: FLD1 ( -- ) { HEX: D9 HEX: E8 } % ;

View File

@ -15,15 +15,16 @@ REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ;
REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ;
REGISTERS: 64
RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ;
REGISTERS: 64 RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ;
REGISTERS: 128
XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7
XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ;
REGISTERS: 80
ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 ;
REGISTERS: 80 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 ;
: shuffle-down ( STn -- STn+1 )
"register" word-prop 1 + 80 registers get at nth ;
PREDICATE: register < word
"register" word-prop ;

View File

@ -0,0 +1,2 @@
Slava Pestov
Joe Groff

View File

@ -0,0 +1,913 @@
! Copyright (C) 2009, 2010 Joe Groff, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien.c-types arrays assocs combinators fry kernel locals
macros math math.vectors namespaces quotations sequences system
compiler.cfg.comparisons compiler.cfg.intrinsics
compiler.codegen.fixup cpu.architecture cpu.x86
cpu.x86.assembler cpu.x86.assembler.operands cpu.x86.features ;
IN: cpu.x86.sse
! Scalar floating point with SSE2
M: x86 %load-float <float> float-rep %load-vector ;
M: x86 %load-double <double> double-rep %load-vector ;
M: float-rep copy-register* drop MOVAPS ;
M: double-rep copy-register* drop MOVAPS ;
M: float-rep copy-memory* drop MOVSS ;
M: double-rep copy-memory* drop MOVSD ;
M: x86 %add-float double-rep two-operand ADDSD ;
M: x86 %sub-float double-rep two-operand SUBSD ;
M: x86 %mul-float double-rep two-operand MULSD ;
M: x86 %div-float double-rep two-operand DIVSD ;
M: x86 %min-float double-rep two-operand MINSD ;
M: x86 %max-float double-rep two-operand MAXSD ;
M: x86 %sqrt SQRTSD ;
: %clear-unless-in-place ( dst src -- )
over = [ drop ] [ dup XORPS ] if ;
M: x86 %single>double-float [ %clear-unless-in-place ] [ CVTSS2SD ] 2bi ;
M: x86 %double>single-float [ %clear-unless-in-place ] [ CVTSD2SS ] 2bi ;
M: x86 integer-float-needs-stack-frame? f ;
M: x86 %integer>float [ drop dup XORPS ] [ CVTSI2SD ] 2bi ;
M: x86 %float>integer CVTTSD2SI ;
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
[ COMISD ] (%compare-float) ;
M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
[ UCOMISD ] (%compare-float) ;
M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
[ COMISD ] (%compare-float-branch) ;
M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
[ UCOMISD ] (%compare-float-branch) ;
! SIMD
M: float-4-rep copy-register* drop MOVAPS ;
M: double-2-rep copy-register* drop MOVAPS ;
M: vector-rep copy-register* drop MOVDQA ;
MACRO: available-reps ( alist -- )
! Each SSE version adds new representations and supports
! all old ones
unzip { } [ append ] accumulate rest swap suffix
[ [ 1quotation ] map ] bi@ zip
reverse [ { } ] suffix
'[ _ cond ] ;
M: x86 %alien-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %zero-vector
{
{ double-2-rep [ dup XORPS ] }
{ float-4-rep [ dup XORPS ] }
[ drop dup PXOR ]
} case ;
M: x86 %zero-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %fill-vector
{
{ double-2-rep [ dup [ XORPS ] [ CMPEQPS ] 2bi ] }
{ float-4-rep [ dup [ XORPS ] [ CMPEQPS ] 2bi ] }
[ drop dup PCMPEQB ]
} case ;
M: x86 %fill-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
rep signed-rep {
{ float-4-rep [
dst src1 float-4-rep %copy
dst src2 UNPCKLPS
src3 src4 UNPCKLPS
dst src3 MOVLHPS
] }
{ int-4-rep [
dst src1 int-4-rep %copy
dst src2 PUNPCKLDQ
src3 src4 PUNPCKLDQ
dst src3 PUNPCKLQDQ
] }
} case ;
M: x86 %gather-vector-4-reps
{
! Can't do this with sse1 since it will want to unbox
! double-precision floats and convert to single precision
{ sse2? { float-4-rep int-4-rep uint-4-rep } }
} available-reps ;
M:: x86 %gather-int-vector-4 ( dst src1 src2 src3 src4 rep -- )
dst rep %zero-vector
dst src1 32-bit-version-of 0 PINSRD
dst src2 32-bit-version-of 1 PINSRD
dst src3 32-bit-version-of 2 PINSRD
dst src4 32-bit-version-of 3 PINSRD ;
M: x86 %gather-int-vector-4-reps
{
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
M:: x86 %gather-vector-2 ( dst src1 src2 rep -- )
rep signed-rep {
{ double-2-rep [
dst src1 double-2-rep %copy
dst src2 MOVLHPS
] }
{ longlong-2-rep [
dst src1 longlong-2-rep %copy
dst src2 PUNPCKLQDQ
] }
} case ;
M: x86 %gather-vector-2-reps
{
{ sse2? { double-2-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M:: x86.64 %gather-int-vector-2 ( dst src1 src2 rep -- )
dst rep %zero-vector
dst src1 0 PINSRQ
dst src2 1 PINSRQ ;
M: x86.64 %gather-int-vector-2-reps
{
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
} available-reps ;
:: %select-vector-32 ( dst src n rep -- )
rep {
{ char-16-rep [
dst 32-bit-version-of src n PEXTRB
dst dst 8-bit-version-of MOVSX
] }
{ uchar-16-rep [
dst 32-bit-version-of src n PEXTRB
] }
{ short-8-rep [
dst 32-bit-version-of src n PEXTRW
dst dst 16-bit-version-of MOVSX
] }
{ ushort-8-rep [
dst 32-bit-version-of src n PEXTRW
] }
{ int-4-rep [
dst 32-bit-version-of src n PEXTRD
dst dst 32-bit-version-of 2dup = [ 2drop ] [ MOVSX ] if
] }
{ uint-4-rep [
dst 32-bit-version-of src n PEXTRD
] }
} case ;
M: x86.32 %select-vector
%select-vector-32 ;
M: x86.32 %select-vector-reps
{
{ sse4.1? { uchar-16-rep char-16-rep ushort-8-rep short-8-rep uint-4-rep int-4-rep } }
} available-reps ;
M: x86.64 %select-vector
{
{ longlong-2-rep [ PEXTRQ ] }
{ ulonglong-2-rep [ PEXTRQ ] }
[ %select-vector-32 ]
} case ;
M: x86.64 %select-vector-reps
{
{ sse4.1? { uchar-16-rep char-16-rep ushort-8-rep short-8-rep uint-4-rep int-4-rep ulonglong-2-rep longlong-2-rep } }
} available-reps ;
: sse1-float-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
{ { 0 1 0 1 } [ dup MOVLHPS ] }
{ { 2 3 2 3 } [ dup MOVHLPS ] }
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
{ { 2 2 3 3 } [ dup UNPCKHPS ] }
[ dupd SHUFPS ]
} case ;
: float-4-shuffle ( dst shuffle -- )
sse3? [
{
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
[ sse1-float-4-shuffle ]
} case
] [ sse1-float-4-shuffle ] if ;
: int-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
{ { 0 0 1 1 } [ dup PUNPCKLDQ ] }
{ { 2 2 3 3 } [ dup PUNPCKHDQ ] }
{ { 0 1 0 1 } [ dup PUNPCKLQDQ ] }
{ { 2 3 2 3 } [ dup PUNPCKHQDQ ] }
[ dupd PSHUFD ]
} case ;
: longlong-2-shuffle ( dst shuffle -- )
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
: >float-4-shuffle ( double-2-shuffle -- float-4-shuffle )
[ 2 * { 0 1 } n+v ] map concat ;
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
dst shuffle rep signed-rep {
{ double-2-rep [ >float-4-shuffle float-4-shuffle ] }
{ float-4-rep [ float-4-shuffle ] }
{ int-4-rep [ int-4-shuffle ] }
{ longlong-2-rep [ longlong-2-shuffle ] }
} case ;
M: x86 %shuffle-vector-imm-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M:: x86 %shuffle-vector-halves-imm ( dst src1 src2 shuffle rep -- )
dst src1 src2 rep two-operand
shuffle rep {
{ double-2-rep [ >float-4-shuffle SHUFPS ] }
{ float-4-rep [ SHUFPS ] }
} case ;
M: x86 %shuffle-vector-halves-imm-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %shuffle-vector ( dst src shuffle rep -- )
two-operand PSHUFB ;
M: x86 %shuffle-vector-reps
{
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %merge-vector-head
[ two-operand ] keep
signed-rep {
{ double-2-rep [ MOVLHPS ] }
{ float-4-rep [ UNPCKLPS ] }
{ longlong-2-rep [ PUNPCKLQDQ ] }
{ int-4-rep [ PUNPCKLDQ ] }
{ short-8-rep [ PUNPCKLWD ] }
{ char-16-rep [ PUNPCKLBW ] }
} case ;
M: x86 %merge-vector-tail
[ two-operand ] keep
signed-rep {
{ double-2-rep [ UNPCKHPD ] }
{ float-4-rep [ UNPCKHPS ] }
{ longlong-2-rep [ PUNPCKHQDQ ] }
{ int-4-rep [ PUNPCKHDQ ] }
{ short-8-rep [ PUNPCKHWD ] }
{ char-16-rep [ PUNPCKHBW ] }
} case ;
M: x86 %merge-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %signed-pack-vector
[ two-operand ] keep
{
{ int-4-rep [ PACKSSDW ] }
{ short-8-rep [ PACKSSWB ] }
} case ;
M: x86 %signed-pack-vector-reps
{
{ sse2? { short-8-rep int-4-rep } }
} available-reps ;
M: x86 %unsigned-pack-vector
[ two-operand ] keep
signed-rep {
{ int-4-rep [ PACKUSDW ] }
{ short-8-rep [ PACKUSWB ] }
} case ;
M: x86 %unsigned-pack-vector-reps
{
{ sse2? { short-8-rep } }
{ sse4.1? { int-4-rep } }
} available-reps ;
M: x86 %tail>head-vector ( dst src rep -- )
dup {
{ float-4-rep [ drop UNPCKHPD ] }
{ double-2-rep [ drop UNPCKHPD ] }
[ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
} case ;
M: x86 %unpack-vector-head ( dst src rep -- )
{
{ char-16-rep [ PMOVSXBW ] }
{ uchar-16-rep [ PMOVZXBW ] }
{ short-8-rep [ PMOVSXWD ] }
{ ushort-8-rep [ PMOVZXWD ] }
{ int-4-rep [ PMOVSXDQ ] }
{ uint-4-rep [ PMOVZXDQ ] }
{ float-4-rep [ CVTPS2PD ] }
} case ;
M: x86 %unpack-vector-head-reps ( -- reps )
{
{ sse2? { float-4-rep } }
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %integer>float-vector ( dst src rep -- )
{
{ int-4-rep [ CVTDQ2PS ] }
} case ;
M: x86 %integer>float-vector-reps
{
{ sse2? { int-4-rep } }
} available-reps ;
M: x86 %float>integer-vector ( dst src rep -- )
{
{ float-4-rep [ CVTTPS2DQ ] }
} case ;
M: x86 %float>integer-vector-reps
{
{ sse2? { float-4-rep } }
} available-reps ;
: (%compare-float-vector) ( dst src rep double single -- )
[ double-2-rep eq? ] 2dip if ; inline
: %compare-float-vector ( dst src rep cc -- )
{
{ cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] }
{ cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] }
{ cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] }
{ cc<>= [ [ CMPORDPD ] [ CMPORDPS ] (%compare-float-vector) ] }
{ cc/< [ [ CMPNLTPD ] [ CMPNLTPS ] (%compare-float-vector) ] }
{ cc/<= [ [ CMPNLEPD ] [ CMPNLEPS ] (%compare-float-vector) ] }
{ cc/= [ [ CMPNEQPD ] [ CMPNEQPS ] (%compare-float-vector) ] }
{ cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
} case ;
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
rep signed-rep :> rep'
dst src rep' {
{ longlong-2-rep [ int64 call ] }
{ int-4-rep [ int32 call ] }
{ short-8-rep [ int16 call ] }
{ char-16-rep [ int8 call ] }
} case ; inline
: %compare-int-vector ( dst src rep cc -- )
{
{ cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
{ cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
} case ;
M: x86 %compare-vector ( dst src1 src2 rep cc -- )
[ [ two-operand ] keep ] dip
over float-vector-rep?
[ %compare-float-vector ]
[ %compare-int-vector ] if ;
: %compare-vector-eq-reps ( -- reps )
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
} available-reps ;
: %compare-vector-ord-reps ( -- reps )
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep short-8-rep int-4-rep } }
{ sse4.2? { longlong-2-rep } }
} available-reps ;
M: x86 %compare-vector-reps
{
{ [ dup { cc= cc/= cc/<>= cc<>= } member-eq? ] [ drop %compare-vector-eq-reps ] }
[ drop %compare-vector-ord-reps ]
} cond ;
: %compare-float-vector-ccs ( cc -- ccs not? )
{
{ cc< [ { { cc< f } } f ] }
{ cc<= [ { { cc<= f } } f ] }
{ cc> [ { { cc< t } } f ] }
{ cc>= [ { { cc<= t } } f ] }
{ cc= [ { { cc= f } } f ] }
{ cc<> [ { { cc< f } { cc< t } } f ] }
{ cc<>= [ { { cc<>= f } } f ] }
{ cc/< [ { { cc/< f } } f ] }
{ cc/<= [ { { cc/<= f } } f ] }
{ cc/> [ { { cc/< t } } f ] }
{ cc/>= [ { { cc/<= t } } f ] }
{ cc/= [ { { cc/= f } } f ] }
{ cc/<> [ { { cc/= f } { cc/<>= f } } f ] }
{ cc/<>= [ { { cc/<>= f } } f ] }
} case ;
: %compare-int-vector-ccs ( cc -- ccs not? )
order-cc {
{ cc< [ { { cc> t } } f ] }
{ cc<= [ { { cc> f } } t ] }
{ cc> [ { { cc> f } } f ] }
{ cc>= [ { { cc> t } } t ] }
{ cc= [ { { cc= f } } f ] }
{ cc/= [ { { cc= f } } t ] }
{ t [ { } t ] }
{ f [ { } f ] }
} case ;
M: x86 %compare-vector-ccs
swap float-vector-rep?
[ %compare-float-vector-ccs ]
[ %compare-int-vector-ccs ] if ;
:: %test-vector-mask ( dst temp mask vcc -- )
vcc {
{ vcc-any [ dst dst TEST dst temp \ CMOVNE (%boolean) ] }
{ vcc-none [ dst dst TEST dst temp \ CMOVE (%boolean) ] }
{ vcc-all [ dst mask CMP dst temp \ CMOVE (%boolean) ] }
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
} case ;
: %move-vector-mask ( dst src rep -- mask )
{
{ double-2-rep [ MOVMSKPS HEX: f ] }
{ float-4-rep [ MOVMSKPS HEX: f ] }
[ drop PMOVMSKB HEX: ffff ]
} case ;
M:: x86 %test-vector ( dst src temp rep vcc -- )
dst src rep %move-vector-mask :> mask
dst temp mask vcc %test-vector-mask ;
:: %test-vector-mask-branch ( label temp mask vcc -- )
vcc {
{ vcc-any [ temp temp TEST label JNE ] }
{ vcc-none [ temp temp TEST label JE ] }
{ vcc-all [ temp mask CMP label JE ] }
{ vcc-notall [ temp mask CMP label JNE ] }
} case ;
M:: x86 %test-vector-branch ( label src temp rep vcc -- )
temp src rep %move-vector-mask :> mask
label temp mask vcc %test-vector-mask-branch ;
M: x86 %test-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ ADDPS ] }
{ double-2-rep [ ADDPD ] }
{ char-16-rep [ PADDB ] }
{ uchar-16-rep [ PADDB ] }
{ short-8-rep [ PADDW ] }
{ ushort-8-rep [ PADDW ] }
{ int-4-rep [ PADDD ] }
{ uint-4-rep [ PADDD ] }
{ longlong-2-rep [ PADDQ ] }
{ ulonglong-2-rep [ PADDQ ] }
} case ;
M: x86 %add-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PADDSB ] }
{ uchar-16-rep [ PADDUSB ] }
{ short-8-rep [ PADDSW ] }
{ ushort-8-rep [ PADDUSW ] }
} case ;
M: x86 %saturated-add-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ ADDSUBPS ] }
{ double-2-rep [ ADDSUBPD ] }
} case ;
M: x86 %add-sub-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sub-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ SUBPS ] }
{ double-2-rep [ SUBPD ] }
{ char-16-rep [ PSUBB ] }
{ uchar-16-rep [ PSUBB ] }
{ short-8-rep [ PSUBW ] }
{ ushort-8-rep [ PSUBW ] }
{ int-4-rep [ PSUBD ] }
{ uint-4-rep [ PSUBD ] }
{ longlong-2-rep [ PSUBQ ] }
{ ulonglong-2-rep [ PSUBQ ] }
} case ;
M: x86 %sub-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PSUBSB ] }
{ uchar-16-rep [ PSUBUSB ] }
{ short-8-rep [ PSUBSW ] }
{ ushort-8-rep [ PSUBUSW ] }
} case ;
M: x86 %saturated-sub-vector-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %mul-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ MULPS ] }
{ double-2-rep [ MULPD ] }
{ short-8-rep [ PMULLW ] }
{ ushort-8-rep [ PMULLW ] }
{ int-4-rep [ PMULLD ] }
{ uint-4-rep [ PMULLD ] }
} case ;
M: x86 %mul-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep short-8-rep ushort-8-rep } }
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %mul-high-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ short-8-rep [ PMULHW ] }
{ ushort-8-rep [ PMULHUW ] }
} case ;
M: x86 %mul-high-vector-reps
{
{ sse2? { short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %mul-horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PMADDUBSW ] }
{ uchar-16-rep [ PMADDUBSW ] }
{ short-8-rep [ PMADDWD ] }
} case ;
M: x86 %mul-horizontal-add-vector-reps
{
{ sse2? { short-8-rep } }
{ ssse3? { char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ DIVPS ] }
{ double-2-rep [ DIVPD ] }
} case ;
M: x86 %div-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %min-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PMINSB ] }
{ uchar-16-rep [ PMINUB ] }
{ short-8-rep [ PMINSW ] }
{ ushort-8-rep [ PMINUW ] }
{ int-4-rep [ PMINSD ] }
{ uint-4-rep [ PMINUD ] }
{ float-4-rep [ MINPS ] }
{ double-2-rep [ MINPD ] }
} case ;
M: x86 %min-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %max-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ char-16-rep [ PMAXSB ] }
{ uchar-16-rep [ PMAXUB ] }
{ short-8-rep [ PMAXSW ] }
{ ushort-8-rep [ PMAXUW ] }
{ int-4-rep [ PMAXSD ] }
{ uint-4-rep [ PMAXUD ] }
{ float-4-rep [ MAXPS ] }
{ double-2-rep [ MAXPD ] }
} case ;
M: x86 %max-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { uchar-16-rep short-8-rep double-2-rep } }
{ sse4.1? { char-16-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %avg-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ uchar-16-rep [ PAVGB ] }
{ ushort-8-rep [ PAVGW ] }
} case ;
M: x86 %avg-vector-reps
{
{ sse2? { uchar-16-rep ushort-8-rep } }
} available-reps ;
M: x86 %dot-vector
[ two-operand ] keep
{
{ float-4-rep [ HEX: ff DPPS ] }
{ double-2-rep [ HEX: ff DPPD ] }
} case ;
M: x86 %dot-vector-reps
{
{ sse4.1? { float-4-rep double-2-rep } }
} available-reps ;
M: x86 %sad-vector
[ two-operand ] keep
{
{ uchar-16-rep [ PSADBW ] }
} case ;
M: x86 %sad-vector-reps
{
{ sse2? { uchar-16-rep } }
} available-reps ;
M: x86 %horizontal-add-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
signed-rep {
{ float-4-rep [ HADDPS ] }
{ double-2-rep [ HADDPD ] }
{ int-4-rep [ PHADDD ] }
{ short-8-rep [ PHADDW ] }
} case ;
M: x86 %horizontal-add-vector-reps
{
{ sse3? { float-4-rep double-2-rep } }
{ ssse3? { int-4-rep uint-4-rep short-8-rep ushort-8-rep } }
} available-reps ;
M: x86 %horizontal-shl-vector-imm ( dst src1 src2 rep -- )
two-operand PSLLDQ ;
M: x86 %horizontal-shl-vector-imm-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %horizontal-shr-vector-imm ( dst src1 src2 rep -- )
two-operand PSRLDQ ;
M: x86 %horizontal-shr-vector-imm-reps
{
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep float-4-rep double-2-rep } }
} available-reps ;
M: x86 %abs-vector ( dst src rep -- )
{
{ char-16-rep [ PABSB ] }
{ short-8-rep [ PABSW ] }
{ int-4-rep [ PABSD ] }
} case ;
M: x86 %abs-vector-reps
{
{ ssse3? { char-16-rep short-8-rep int-4-rep } }
} available-reps ;
M: x86 %sqrt-vector ( dst src rep -- )
{
{ float-4-rep [ SQRTPS ] }
{ double-2-rep [ SQRTPD ] }
} case ;
M: x86 %sqrt-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep } }
} available-reps ;
M: x86 %and-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ ANDPS ] }
{ double-2-rep [ ANDPS ] }
[ drop PAND ]
} case ;
M: x86 %and-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %andn-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ ANDNPS ] }
{ double-2-rep [ ANDNPS ] }
[ drop PANDN ]
} case ;
M: x86 %andn-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %or-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ ORPS ] }
{ double-2-rep [ ORPS ] }
[ drop POR ]
} case ;
M: x86 %or-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %xor-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ float-4-rep [ XORPS ] }
{ double-2-rep [ XORPS ] }
[ drop PXOR ]
} case ;
M: x86 %xor-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %shl-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ short-8-rep [ PSLLW ] }
{ ushort-8-rep [ PSLLW ] }
{ int-4-rep [ PSLLD ] }
{ uint-4-rep [ PSLLD ] }
{ longlong-2-rep [ PSLLQ ] }
{ ulonglong-2-rep [ PSLLQ ] }
} case ;
M: x86 %shl-vector-reps
{
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %shr-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
{ short-8-rep [ PSRAW ] }
{ ushort-8-rep [ PSRLW ] }
{ int-4-rep [ PSRAD ] }
{ uint-4-rep [ PSRLD ] }
{ ulonglong-2-rep [ PSRLQ ] }
} case ;
M: x86 %shr-vector-reps
{
{ sse2? { short-8-rep ushort-8-rep int-4-rep uint-4-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %shl-vector-imm %shl-vector ;
M: x86 %shl-vector-imm-reps %shl-vector-reps ;
M: x86 %shr-vector-imm %shr-vector ;
M: x86 %shr-vector-imm-reps %shr-vector-reps ;
: scalar-sized-reg ( reg rep -- reg' )
rep-size 8 * n-bit-version-of ;
M: x86 %integer>scalar drop MOVD ;
:: %scalar>integer-32 ( dst src rep -- )
rep {
{ int-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 32-bit-version-of
2dup eq? [ 2drop ] [ MOVSX ] if
] }
{ uint-scalar-rep [
dst 32-bit-version-of src MOVD
] }
{ short-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVSX
] }
{ ushort-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVZX
] }
{ char-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVSX
dst tmp-dst int-rep %copy
] with-small-register
] }
{ uchar-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVZX
dst tmp-dst int-rep %copy
] with-small-register
] }
} case ;
M: x86.32 %scalar>integer ( dst src rep -- ) %scalar>integer-32 ;
M: x86.64 %scalar>integer ( dst src rep -- )
{
{ longlong-scalar-rep [ MOVD ] }
{ ulonglong-scalar-rep [ MOVD ] }
[ %scalar>integer-32 ]
} case ;
M: x86 %vector>scalar %copy ;
M: x86 %scalar>vector %copy ;
enable-float-min/max

View File

@ -0,0 +1 @@
not loaded

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
not loaded

View File

@ -0,0 +1,91 @@
! Copyright (C) 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien.c-types combinators kernel locals system namespaces
compiler.codegen.fixup compiler.constants
compiler.cfg.comparisons cpu.architecture cpu.x86
cpu.x86.assembler cpu.x86.assembler.operands ;
IN: cpu.x86.x87
! x87 unit is only used if SSE2 is not available.
: FLD* ( src -- ) [ ST0 ] dip FLD ;
: FSTP* ( dst -- ) ST0 FSTP ;
: copy-register-x87 ( dst src -- )
2dup eq? [ 2drop ] [ FLD* shuffle-down FSTP* ] if ;
M: float-rep copy-register* drop copy-register-x87 ;
M: double-rep copy-register* drop copy-register-x87 ;
: load-x87 ( dst src rep -- )
{
{ float-rep [ FLDS shuffle-down FSTP* ] }
{ double-rep [ FLDL shuffle-down FSTP* ] }
} case ;
: store-x87 ( dst src rep -- )
{
{ float-rep [ FLD* FSTPS ] }
{ double-rep [ FLD* FSTPL ] }
} case ;
: copy-memory-x87 ( dst src rep -- )
{
{ [ pick register? ] [ load-x87 ] }
{ [ over register? ] [ store-x87 ] }
} cond ;
M: float-rep copy-memory* copy-memory-x87 ;
M: double-rep copy-memory* copy-memory-x87 ;
M: x86 %load-float
0 [] FLDS
<float> rc-absolute rel-binary-literal
shuffle-down FSTP* ;
M: x86 %load-double
0 [] FLDL
<double> rc-absolute rel-binary-literal
shuffle-down FSTP* ;
:: binary-op ( dst src1 src2 quot -- )
src1 FLD*
ST0 src2 shuffle-down quot call
dst shuffle-down FSTP* ; inline
M: x86 %add-float [ FADD ] binary-op ;
M: x86 %sub-float [ FSUB ] binary-op ;
M: x86 %mul-float [ FMUL ] binary-op ;
M: x86 %div-float [ FDIV ] binary-op ;
M: x86 %sqrt FLD* FSQRT shuffle-down FSTP* ;
M: x86 %single>double-float copy-register-x87 ;
M: x86 %double>single-float copy-register-x87 ;
M: x86 integer-float-needs-stack-frame? t ;
M:: x86 %integer>float ( dst src -- )
4 stack@ src MOV
4 stack@ FILDD
dst shuffle-down FSTP* ;
M:: x86 %float>integer ( dst src -- )
src FLD*
4 stack@ FISTTPD
dst 4 stack@ MOV ;
: compare-op ( src1 src2 quot -- )
[ ST0 ] 3dip binary-op ; inline
M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- )
[ [ FCOMI ] compare-op ] (%compare-float) ;
M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- )
[ [ FUCOMI ] compare-op ] (%compare-float) ;
M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- )
[ [ FCOMI ] compare-op ] (%compare-float-branch) ;
M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- )
[ [ FUCOMI ] compare-op ] (%compare-float-branch) ;