diff --git a/README.txt b/README.txt index a33a85b218..016d60e68c 100755 --- a/README.txt +++ b/README.txt @@ -55,10 +55,13 @@ For X11 support, you need recent development libraries for libc, Pango, X11, and OpenGL. On a Debian-derived Linux distribution (like Ubuntu), you can use the following line to grab everything: - sudo apt-get install libc6-dev libpango-1.0-dev libx11-dev + sudo apt-get install libc6-dev libpango1.0-dev libx11-dev libgl1-mesa-dev + +Note that if you are using a proprietary OpenGL driver, you should +probably leave out the last package in the list. If your DISPLAY environment variable is set, the UI will start -automatically: +automatically when you run Factor: ./factor diff --git a/basis/bootstrap/compiler/compiler.factor b/basis/bootstrap/compiler/compiler.factor index 0505dcb184..4394535b8d 100755 --- a/basis/bootstrap/compiler/compiler.factor +++ b/basis/bootstrap/compiler/compiler.factor @@ -6,11 +6,14 @@ classes.private arrays hashtables vectors classes.tuple sbufs hashtables.private sequences.private math classes.tuple.private growable namespaces.private assocs words command-line vocabs io io.encodings.string libc splitting math.parser memory compiler.units -math.order compiler.tree.builder compiler.tree.optimizer -compiler.cfg.optimizer ; -FROM: compiler => enable-optimizer compile-word ; +math.order quotations quotations.private assocs.private ; +FROM: compiler => enable-optimizer ; IN: bootstrap.compiler +"profile-compiler" get [ + "bootstrap.compiler.timing" require +] when + ! Don't bring this in when deploying, since it will store a ! reference to 'eval' in a global variable "deploy-vocab" get "staging" get or [ @@ -42,16 +45,24 @@ nl ! which are also quick to compile are replaced by ! compiled definitions as soon as possible. { - not + not ? + + 2over roll -roll array? hashtable? vector? tuple? sbuf? tombstone? + curry? compose? callable? + quotation? - array-nth set-array-nth + curry compose uncurry + + array-nth set-array-nth length>> wrap probe namestack* + + layout-of } compile-unoptimized "." write flush @@ -75,7 +86,7 @@ nl "." write flush { - hashcode* = get set + hashcode* = equal? assoc-stack (assoc-stack) get set } compile-unoptimized "." write flush @@ -100,22 +111,6 @@ nl "." write flush -{ build-tree } compile-unoptimized - -"." write flush - -{ optimize-tree } compile-unoptimized - -"." write flush - -{ optimize-cfg } compile-unoptimized - -"." write flush - -{ compile-word } compile-unoptimized - -"." write flush - vocabs [ words compile-unoptimized "." write flush ] each " done" print flush diff --git a/basis/bootstrap/compiler/timing/timing.factor b/basis/bootstrap/compiler/timing/timing.factor new file mode 100644 index 0000000000..e1466e3409 --- /dev/null +++ b/basis/bootstrap/compiler/timing/timing.factor @@ -0,0 +1,38 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors compiler.cfg.builder compiler.cfg.linear-scan +compiler.cfg.liveness compiler.cfg.mr compiler.cfg.optimizer +compiler.cfg.stacks.finalize compiler.cfg.stacks.global +compiler.codegen compiler.tree.builder compiler.tree.optimizer +kernel make sequences tools.annotations tools.crossref ; +IN: bootstrap.compiler.timing + +: passes ( word -- seq ) + def>> uses [ vocabulary>> "compiler." head? ] filter ; + +: high-level-passes ( -- seq ) \ optimize-tree passes ; + +: low-level-passes ( -- seq ) \ optimize-cfg passes ; + +: machine-passes ( -- seq ) \ build-mr passes ; + +: linear-scan-passes ( -- seq ) \ (linear-scan) passes ; + +: all-passes ( -- seq ) + [ + \ build-tree , + \ optimize-tree , + high-level-passes % + \ build-cfg , + \ compute-global-sets , + \ finalize-stack-shuffling , + \ optimize-cfg , + low-level-passes % + \ compute-live-sets , + \ build-mr , + machine-passes % + linear-scan-passes % + \ generate , + ] { } make ; + +all-passes [ [ reset ] [ add-timing ] bi ] each \ No newline at end of file diff --git a/basis/compiler/cfg/gc-checks/gc-checks-tests.factor b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor new file mode 100644 index 0000000000..7b3e07faf8 --- /dev/null +++ b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor @@ -0,0 +1,26 @@ +IN: compiler.cfg.gc-checks.tests +USING: compiler.cfg.gc-checks compiler.cfg.debugger +compiler.cfg.registers compiler.cfg.instructions compiler.cfg +compiler.cfg.predecessors cpu.architecture tools.test kernel vectors +namespaces accessors sequences ; + +: test-gc-checks ( -- ) + cfg new 0 get >>entry + compute-predecessors + insert-gc-checks + drop ; + +V{ + T{ ##inc-d f 3 } + T{ ##replace f V int-regs 0 D 1 } +} 0 test-bb + +V{ + T{ ##box-float f V int-regs 0 V int-regs 1 } +} 1 test-bb + +0 get 1 get 1vector >>successors drop + +[ ] [ test-gc-checks ] unit-test + +[ V{ D 0 D 2 } ] [ 1 get instructions>> first uninitialized-locs>> ] unit-test \ No newline at end of file diff --git a/basis/compiler/cfg/gc-checks/gc-checks.factor b/basis/compiler/cfg/gc-checks/gc-checks.factor index 8435a231e6..c34f2c42a3 100644 --- a/basis/compiler/cfg/gc-checks/gc-checks.factor +++ b/basis/compiler/cfg/gc-checks/gc-checks.factor @@ -1,17 +1,27 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel sequences assocs -compiler.cfg.rpo compiler.cfg.instructions -compiler.cfg.hats ; +USING: accessors kernel sequences assocs fry +compiler.cfg.rpo +compiler.cfg.hats +compiler.cfg.registers +compiler.cfg.instructions +compiler.cfg.stacks.uninitialized ; IN: compiler.cfg.gc-checks -: gc? ( bb -- ? ) +: insert-gc-check? ( bb -- ? ) instructions>> [ ##allocation? ] any? ; -: insert-gc-check ( basic-block -- ) - dup gc? [ - [ i i f \ ##gc new-insn prefix ] change-instructions drop - ] [ drop ] if ; +: blocks-with-gc ( cfg -- bbs ) + post-order [ insert-gc-check? ] filter ; + +: insert-gc-check ( bb -- ) + dup '[ + i i f _ uninitialized-locs \ ##gc new-insn + prefix + ] change-instructions drop ; : insert-gc-checks ( cfg -- cfg' ) - dup [ insert-gc-check ] each-basic-block ; \ No newline at end of file + dup blocks-with-gc [ + over compute-uninitialized-sets + [ insert-gc-check ] each + ] unless-empty ; \ No newline at end of file diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index e08b3b25bb..0a52f1aa94 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -190,7 +190,7 @@ INSN: ##fixnum-add < ##fixnum-overflow ; INSN: ##fixnum-sub < ##fixnum-overflow ; INSN: ##fixnum-mul < ##fixnum-overflow ; -INSN: ##gc temp1 temp2 live-values ; +INSN: ##gc temp1 temp2 live-values uninitialized-locs ; ! Instructions used by machine IR only. INSN: _prologue stack-frame ; @@ -219,7 +219,7 @@ INSN: _fixnum-mul < _fixnum-overflow ; TUPLE: spill-slot n ; C: spill-slot -INSN: _gc temp1 temp2 gc-roots gc-root-count gc-root-size ; +INSN: _gc temp1 temp2 gc-roots gc-root-count gc-root-size uninitialized-locs ; ! These instructions operate on machine registers and not ! virtual registers diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index c6642d8ad9..2618db0904 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -48,11 +48,11 @@ IN: compiler.cfg.intrinsics slots.private:set-slot strings.private:string-nth strings.private:set-string-nth-fast - ! classes.tuple.private: - ! arrays: - ! byte-arrays: - ! byte-arrays:(byte-array) - ! kernel: + classes.tuple.private: + arrays: + byte-arrays: + byte-arrays:(byte-array) + kernel: alien.accessors:alien-unsigned-1 alien.accessors:set-alien-unsigned-1 alien.accessors:alien-signed-1 @@ -61,7 +61,7 @@ IN: compiler.cfg.intrinsics alien.accessors:set-alien-unsigned-2 alien.accessors:alien-signed-2 alien.accessors:set-alien-signed-2 - ! alien.accessors:alien-cell + alien.accessors:alien-cell alien.accessors:set-alien-cell } [ t "intrinsic" set-word-prop ] each @@ -90,7 +90,7 @@ IN: compiler.cfg.intrinsics alien.accessors:set-alien-float alien.accessors:alien-double alien.accessors:set-alien-double - } drop f [ t "intrinsic" set-word-prop ] each ; + } [ t "intrinsic" set-word-prop ] each ; : enable-fixnum-log2 ( -- ) \ math.integers.private:fixnum-log2 t "intrinsic" set-word-prop ; diff --git a/basis/compiler/cfg/linear-scan/allocation/allocation.factor b/basis/compiler/cfg/linear-scan/allocation/allocation.factor index c197da9814..d55266e6e4 100644 --- a/basis/compiler/cfg/linear-scan/allocation/allocation.factor +++ b/basis/compiler/cfg/linear-scan/allocation/allocation.factor @@ -28,29 +28,11 @@ IN: compiler.cfg.linear-scan.allocation : no-free-registers? ( result -- ? ) second 0 = ; inline -: split-to-fit ( new n -- before after ) - split-interval - [ [ compute-start/end ] bi@ ] - [ >>split-next drop ] - [ ] - 2tri ; - -: register-partially-available ( new result -- ) - { - { [ 2dup second 1 - spill-live-out? ] [ drop spill-live-out ] } - { [ 2dup second 1 - spill-live-in? ] [ drop spill-live-in ] } - [ - [ second 1 - split-to-fit ] keep - '[ _ register-available ] [ add-unhandled ] bi* - ] - } cond ; - : assign-register ( new -- ) dup coalesce? [ coalesce ] [ dup register-status { { [ dup no-free-registers? ] [ drop assign-blocked-register ] } { [ 2dup register-available? ] [ register-available ] } - ! [ register-partially-available ] [ drop assign-blocked-register ] } cond ] if ; diff --git a/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor b/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor index 14046a91f1..4dd3c8176c 100644 --- a/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor +++ b/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor @@ -28,23 +28,42 @@ ERROR: bad-live-ranges interval ; [ swap first (>>from) ] 2bi ; -: split-for-spill ( live-interval n -- before after ) - split-interval - { - [ [ trim-before-ranges ] [ trim-after-ranges ] bi* ] - [ [ compute-start/end ] bi@ ] - [ [ check-ranges ] bi@ ] - [ ] - } 2cleave ; - : assign-spill ( live-interval -- ) - dup vreg>> assign-spill-slot >>spill-to f >>split-next drop ; + dup vreg>> assign-spill-slot >>spill-to drop ; + +: spill-before ( before -- before/f ) + ! If the interval does not have any usages before the spill location, + ! then it is the second child of an interval that was split. We reload + ! the value and let the resolve pass insert a split later. + dup uses>> empty? [ drop f ] [ + { + [ ] + [ assign-spill ] + [ trim-before-ranges ] + [ compute-start/end ] + [ check-ranges ] + } cleave + ] if ; : assign-reload ( live-interval -- ) dup vreg>> assign-spill-slot >>reload-from drop ; -: split-and-spill ( live-interval n -- before after ) - split-for-spill 2dup [ assign-spill ] [ assign-reload ] bi* ; +: spill-after ( after -- after/f ) + ! If the interval has no more usages after the spill location, + ! then it is the first child of an interval that was split. We + ! spill the value and let the resolve pass insert a reload later. + dup uses>> empty? [ drop f ] [ + { + [ ] + [ assign-reload ] + [ trim-after-ranges ] + [ compute-start/end ] + [ check-ranges ] + } cleave + ] if ; + +: split-for-spill ( live-interval n -- before after ) + split-interval [ spill-before ] [ spill-after ] bi* ; : find-use-position ( live-interval new -- n ) [ uses>> ] [ start>> '[ _ >= ] ] bi* find nip 1/0. or ; @@ -72,47 +91,12 @@ ERROR: bad-live-ranges interval ; [ uses>> first ] [ second ] bi* > ; : spill-new ( new pair -- ) - drop - { - [ trim-after-ranges ] - [ compute-start/end ] - [ assign-reload ] - [ add-unhandled ] - } cleave ; - -: spill-live-out? ( live-interval n -- ? ) [ uses>> last ] dip < ; - -: spill-live-out ( live-interval -- ) - ! The interval has no more usages after the spill location. This - ! means it is the first child of an interval that was split. We - ! spill the value and let the resolve pass insert a reload later. - { - [ trim-before-ranges ] - [ compute-start/end ] - [ assign-spill ] - [ add-handled ] - } cleave ; - -: spill-live-in? ( live-interval n -- ? ) [ uses>> first ] dip > ; - -: spill-live-in ( live-interval -- ) - ! The interval does not have any usages before the spill location. - ! This means it is the second child of an interval that was - ! split. We reload the value and let the resolve pass insert a - ! split later. - { - [ trim-after-ranges ] - [ compute-start/end ] - [ assign-reload ] - [ add-unhandled ] - } cleave ; + drop spill-after add-unhandled ; : spill ( live-interval n -- ) - { - { [ 2dup spill-live-out? ] [ drop spill-live-out ] } - { [ 2dup spill-live-in? ] [ drop spill-live-in ] } - [ split-and-spill [ add-handled ] [ add-unhandled ] bi* ] - } cond ; + split-for-spill + [ [ add-handled ] when* ] + [ [ add-unhandled ] when* ] bi* ; :: spill-intersecting-active ( new reg -- ) ! If there is an active interval using 'reg' (there should be at @@ -149,8 +133,8 @@ ERROR: bad-live-ranges interval ; ! A register would be available for part of the new ! interval's lifetime if all active and inactive intervals ! using that register were split and spilled. - [ second 1 - split-and-spill add-unhandled ] keep - spill-available ; + [ second 1 - split-for-spill [ add-unhandled ] when* ] keep + '[ _ spill-available ] when* ; : assign-blocked-register ( new -- ) dup spill-status { diff --git a/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor b/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor index 0a67710bc8..874523d70a 100644 --- a/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor +++ b/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor @@ -27,9 +27,6 @@ IN: compiler.cfg.linear-scan.allocation.splitting : split-uses ( uses n -- before after ) '[ _ <= ] partition ; -: record-split ( live-interval before after -- ) - [ >>split-before ] [ >>split-after ] bi* drop ; inline - ERROR: splitting-too-early ; ERROR: splitting-too-late ; @@ -56,7 +53,6 @@ ERROR: splitting-atomic-interval ; live-interval clone :> after live-interval uses>> n split-uses before after [ (>>uses) ] bi-curry@ bi* live-interval ranges>> n split-ranges before after [ (>>ranges) ] bi-curry@ bi* - live-interval before after record-split before split-before after split-after ; diff --git a/basis/compiler/cfg/linear-scan/debugger/debugger.factor b/basis/compiler/cfg/linear-scan/debugger/debugger.factor index a350ee5f43..c9c1b77a0d 100644 --- a/basis/compiler/cfg/linear-scan/debugger/debugger.factor +++ b/basis/compiler/cfg/linear-scan/debugger/debugger.factor @@ -5,25 +5,12 @@ namespaces prettyprint compiler.cfg.linear-scan.live-intervals compiler.cfg.linear-scan.allocation compiler.cfg assocs ; IN: compiler.cfg.linear-scan.debugger -: check-assigned ( live-intervals -- ) - [ - reg>> - [ "Not all intervals have registers" throw ] unless - ] each ; - -: split-children ( live-interval -- seq ) - dup split-before>> [ - [ split-before>> ] [ split-after>> ] bi - [ split-children ] bi@ - append - ] [ 1array ] if ; - : check-linear-scan ( live-intervals machine-registers -- ) [ [ clone ] map dup [ [ vreg>> ] keep ] H{ } map>assoc live-intervals set - ] dip allocate-registers - [ split-children ] map concat check-assigned ; + ] dip + allocate-registers drop ; : picture ( uses -- str ) dup last 1 + CHAR: space diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor index f38946f8e2..df91109e78 100644 --- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor +++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor @@ -75,6 +75,9 @@ check-numbering? on { T{ live-range f 0 5 } } 0 split-ranges ] unit-test +H{ { int-regs 10 } { float-regs 20 } } clone spill-counts set +H{ } spill-slots set + [ T{ live-interval { vreg T{ vreg { reg-class int-regs } { n 1 } } } @@ -82,6 +85,7 @@ check-numbering? on { end 2 } { uses V{ 0 1 } } { ranges V{ T{ live-range f 0 2 } } } + { spill-to 10 } } T{ live-interval { vreg T{ vreg { reg-class int-regs } { n 1 } } } @@ -89,6 +93,7 @@ check-numbering? on { end 5 } { uses V{ 5 } } { ranges V{ T{ live-range f 5 5 } } } + { reload-from 10 } } ] [ T{ live-interval @@ -97,82 +102,61 @@ check-numbering? on { end 5 } { uses V{ 0 1 5 } } { ranges V{ T{ live-range f 0 5 } } } - } 2 split-for-spill [ f >>split-next ] bi@ + } 2 split-for-spill ] unit-test [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 0 } { end 1 } { uses V{ 0 } } { ranges V{ T{ live-range f 0 1 } } } + { spill-to 11 } } T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 1 } { end 5 } { uses V{ 1 5 } } { ranges V{ T{ live-range f 1 5 } } } + { reload-from 11 } } ] [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 0 } { end 5 } { uses V{ 0 1 5 } } { ranges V{ T{ live-range f 0 5 } } } - } 0 split-for-spill [ f >>split-next ] bi@ + } 0 split-for-spill ] unit-test [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 0 } { end 1 } { uses V{ 0 } } { ranges V{ T{ live-range f 0 1 } } } + { spill-to 12 } } T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 20 } { end 30 } { uses V{ 20 30 } } { ranges V{ T{ live-range f 20 30 } } } + { reload-from 12 } } ] [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 0 } { end 30 } { uses V{ 0 20 30 } } { ranges V{ T{ live-range f 0 8 } T{ live-range f 10 18 } T{ live-range f 20 30 } } } - } 10 split-for-spill [ f >>split-next ] bi@ -] unit-test - -[ - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 0 } - { end 4 } - { uses V{ 0 1 4 } } - { ranges V{ T{ live-range f 0 4 } } } - } - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 5 } - { end 10 } - { uses V{ 5 10 } } - { ranges V{ T{ live-range f 5 10 } } } - } -] [ - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 0 } - { end 10 } - { uses V{ 0 1 4 5 10 } } - { ranges V{ T{ live-range f 0 10 } } } - } 4 split-to-fit [ f >>split-next ] bi@ + } 10 split-for-spill ] unit-test [ @@ -352,6 +336,78 @@ check-numbering? on check-linear-scan ] must-fail +! Problem with spilling intervals with no more usages after the spill location + +[ ] [ + { + T{ live-interval + { vreg T{ vreg { n 1 } { reg-class int-regs } } } + { start 0 } + { end 20 } + { uses V{ 0 10 20 } } + { ranges V{ T{ live-range f 0 2 } T{ live-range f 10 20 } } } + } + T{ live-interval + { vreg T{ vreg { n 2 } { reg-class int-regs } } } + { start 0 } + { end 20 } + { uses V{ 0 10 20 } } + { ranges V{ T{ live-range f 0 2 } T{ live-range f 10 20 } } } + } + T{ live-interval + { vreg T{ vreg { n 3 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 6 } } + { ranges V{ T{ live-range f 4 8 } } } + } + T{ live-interval + { vreg T{ vreg { n 4 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 4 8 } } } + } + + ! This guy will invoke the 'spill partially available' code path + T{ live-interval + { vreg T{ vreg { n 5 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 4 8 } } } + } + } + H{ { int-regs { "A" "B" } } } + check-linear-scan +] unit-test + + +! Test spill-new code path + +[ ] [ + { + T{ live-interval + { vreg T{ vreg { n 1 } { reg-class int-regs } } } + { start 0 } + { end 10 } + { uses V{ 0 6 10 } } + { ranges V{ T{ live-range f 0 10 } } } + } + + ! This guy will invoke the 'spill new' code path + T{ live-interval + { vreg T{ vreg { n 5 } { reg-class int-regs } } } + { start 2 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 2 8 } } } + } + } + H{ { int-regs { "A" } } } + check-linear-scan +] unit-test + SYMBOL: available SYMBOL: taken diff --git a/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor b/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor index 77aae14503..48bef197e6 100644 --- a/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor +++ b/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor @@ -13,7 +13,6 @@ C: live-range TUPLE: live-interval vreg reg spill-to reload-from -split-before split-after split-next start end ranges uses copy-from ; diff --git a/basis/compiler/cfg/linearization/linearization.factor b/basis/compiler/cfg/linearization/linearization.factor index 97fb3205c2..cbeb301901 100755 --- a/basis/compiler/cfg/linearization/linearization.factor +++ b/basis/compiler/cfg/linearization/linearization.factor @@ -98,15 +98,18 @@ M: ##dispatch linearize-insn M: ##gc linearize-insn nip - [ temp1>> ] - [ temp2>> ] - [ - live-values>> - [ compute-gc-roots ] - [ count-gc-roots ] - [ gc-roots-size ] - tri - ] tri + { + [ temp1>> ] + [ temp2>> ] + [ + live-values>> + [ compute-gc-roots ] + [ count-gc-roots ] + [ gc-roots-size ] + tri + ] + [ uninitialized-locs>> ] + } cleave _gc ; : linearize-basic-blocks ( cfg -- insns ) diff --git a/basis/compiler/cfg/liveness/ssa/ssa.factor b/basis/compiler/cfg/liveness/ssa/ssa.factor index 9fa22d22b1..dbfe2d70b4 100644 --- a/basis/compiler/cfg/liveness/ssa/ssa.factor +++ b/basis/compiler/cfg/liveness/ssa/ssa.factor @@ -55,3 +55,7 @@ SYMBOL: work-list H{ } clone live-outs set dup post-order add-to-work-list work-list get [ liveness-step ] slurp-deque ; + +: live-in? ( vreg bb -- ? ) live-in key? ; + +: live-out? ( vreg bb -- ? ) live-out key? ; \ No newline at end of file diff --git a/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor b/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor index 536f5e1e68..01aebd7e1c 100644 --- a/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor +++ b/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs fry kernel namespaces sequences math arrays compiler.cfg.def-use compiler.cfg.instructions -compiler.cfg.liveness compiler.cfg.rpo ; +compiler.cfg.liveness.ssa compiler.cfg.rpo ; IN: compiler.cfg.ssa.destruction.live-ranges ! Live ranges for interference testing @@ -52,9 +52,9 @@ PRIVATE> ERROR: bad-kill-index vreg bb ; : kill-index ( vreg bb -- n ) - 2dup live-out key? [ 2drop 1/0. ] [ + 2dup live-out? [ 2drop 1/0. ] [ 2dup kill-indices get at at* [ 2nip ] [ - drop 2dup live-in key? + drop 2dup live-in? [ bad-kill-index ] [ 2drop -1/0. ] if ] if ] if ; diff --git a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor index f8c8a4d8b2..ce2aa1c5d7 100644 --- a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor +++ b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor @@ -4,7 +4,7 @@ USING: accessors assocs fry kernel locals math math.order arrays namespaces sequences sorting sets combinators combinators.short-circuit make compiler.cfg.def-use compiler.cfg.instructions -compiler.cfg.liveness +compiler.cfg.liveness.ssa compiler.cfg.dominance compiler.cfg.ssa.destruction.state compiler.cfg.ssa.destruction.forest @@ -19,13 +19,13 @@ IN: compiler.cfg.ssa.destruction.process-blocks SYMBOLS: phi-union unioned-blocks ; :: operand-live-into-phi-node's-block? ( bb src dst -- ? ) - src bb live-in key? ; + src bb live-in? ; :: phi-node-is-live-out-of-operand's-block? ( bb src dst -- ? ) - dst src def-of live-out key? ; + dst src def-of live-out? ; :: operand-is-phi-node-and-live-into-operand's-block? ( bb src dst -- ? ) - { [ src insn-of ##phi? ] [ src src def-of live-in key? ] } 0&& ; + { [ src insn-of ##phi? ] [ src src def-of live-in? ] } 0&& ; :: operand-being-renamed? ( bb src dst -- ? ) src processed-names get key? ; @@ -61,10 +61,10 @@ SYMBOLS: phi-union unioned-blocks ; } cond ; : node-is-live-in-of-child? ( node child -- ? ) - [ vreg>> ] [ bb>> live-in ] bi* key? ; + [ vreg>> ] [ bb>> ] bi* live-in? ; : node-is-live-out-of-child? ( node child -- ? ) - [ vreg>> ] [ bb>> live-out ] bi* key? ; + [ vreg>> ] [ bb>> ] bi* live-out? ; :: insert-copy ( bb src dst -- ) bb src dst trivial-interference diff --git a/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor b/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor new file mode 100644 index 0000000000..6f3e35994a --- /dev/null +++ b/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor @@ -0,0 +1,61 @@ +IN: compiler.cfg.stacks.uninitialized.tests +USING: compiler.cfg.stacks.uninitialized compiler.cfg.debugger +compiler.cfg.registers compiler.cfg.instructions compiler.cfg +compiler.cfg.predecessors cpu.architecture tools.test kernel vectors +namespaces accessors sequences ; + +: test-uninitialized ( -- ) + cfg new 0 get >>entry + compute-predecessors + compute-uninitialized-sets ; + +V{ + T{ ##inc-d f 3 } +} 0 test-bb + +V{ + T{ ##replace f V int-regs 0 D 0 } + T{ ##replace f V int-regs 0 D 1 } + T{ ##replace f V int-regs 0 D 2 } + T{ ##inc-r f 1 } +} 1 test-bb + +V{ + T{ ##peek f V int-regs 0 D 0 } + T{ ##inc-d f 1 } +} 2 test-bb + +0 get 1 get 1vector >>successors drop +1 get 2 get 1vector >>successors drop + +[ ] [ test-uninitialized ] unit-test + +[ V{ D 0 D 1 D 2 } ] [ 1 get uninitialized-locs ] unit-test +[ V{ R 0 } ] [ 2 get uninitialized-locs ] unit-test + +! When merging, if a location is uninitialized in one branch and +! initialized in another, we have to consider it uninitialized, +! since it cannot be safely read from by a ##peek, or traced by GC. + +V{ } 0 test-bb + +V{ + T{ ##inc-d f 1 } +} 1 test-bb + +V{ + T{ ##call f namestack } + T{ ##branch } +} 2 test-bb + +V{ + T{ ##return } +} 3 test-bb + +0 get 1 get 2 get V{ } 2sequence >>successors drop +1 get 3 get 1vector >>successors drop +2 get 3 get 1vector >>successors drop + +[ ] [ test-uninitialized ] unit-test + +[ V{ D 0 } ] [ 3 get uninitialized-locs ] unit-test diff --git a/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor new file mode 100644 index 0000000000..ee60c4bd7a --- /dev/null +++ b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor @@ -0,0 +1,76 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences byte-arrays namespaces accessors classes math +math.order fry arrays combinators compiler.cfg.registers +compiler.cfg.instructions compiler.cfg.dataflow-analysis ; +IN: compiler.cfg.stacks.uninitialized + +! Uninitialized stack location analysis. + +! Consider the following sequence of instructions: +! ##inc-d 2 +! _gc +! ##replace ... D 0 +! ##replace ... D 1 +! The GC check runs before stack locations 0 and 1 have been initialized, +! and it needs to zero them out so that GC doesn't try to trace them. + + ] [ prepend ] } + } cond + ] change ; + +M: ##inc-d visit-insn n>> ds-loc handle-inc ; + +M: ##inc-r visit-insn n>> rs-loc handle-inc ; + +ERROR: uninitialized-peek insn ; + +M: ##peek visit-insn + dup loc>> [ n>> ] [ class get ] bi ?nth 0 = + [ uninitialized-peek ] [ drop ] if ; + +M: ##replace visit-insn + loc>> [ n>> ] [ class get ] bi + 2dup length < [ [ 1 ] 2dip set-nth ] [ 2drop ] if ; + +M: insn visit-insn drop ; + +: prepare ( pair -- ) + [ first2 [ [ clone ] [ B{ } ] if* ] bi@ ] [ B{ } B{ } ] if* + [ ds-loc set ] [ rs-loc set ] bi* ; + +: visit-block ( bb -- ) instructions>> [ visit-insn ] each ; + +: finish ( -- pair ) ds-loc get rs-loc get 2array ; + +: (join-sets) ( seq1 seq2 -- seq ) + 2dup [ length ] bi@ max '[ _ 1 pad-tail ] bi@ [ min ] 2map ; + +: (uninitialized-locs) ( seq quot -- seq' ) + [ dup length [ drop 0 = ] pusher [ 2each ] dip ] dip map ; inline + +PRIVATE> + +FORWARD-ANALYSIS: uninitialized + +M: uninitialized-analysis transfer-set ( pair bb analysis -- pair' ) + drop [ prepare ] dip visit-block finish ; + +M: uninitialized-analysis join-sets ( sets analysis -- pair ) + drop sift [ f ] [ [ ] [ [ (join-sets) ] 2map ] map-reduce ] if-empty ; + +: uninitialized-locs ( bb -- locs ) + uninitialized-in dup [ + first2 + [ [ ] (uninitialized-locs) ] + [ [ ] (uninitialized-locs) ] + bi* append + ] when ; \ No newline at end of file diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index f9a4786eb5..672ed9ce02 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -4,7 +4,7 @@ USING: namespaces make math math.order math.parser sequences accessors kernel kernel.private layouts assocs words summary arrays combinators classes.algebra alien alien.c-types alien.structs alien.strings alien.arrays alien.complex alien.libraries sets libc -continuations.private fry cpu.architecture classes +continuations.private fry cpu.architecture classes locals source-files.errors compiler.errors compiler.alien @@ -215,13 +215,44 @@ M: ##write-barrier generate-insn [ table>> ] tri %write-barrier ; +! GC checks +: wipe-locs ( locs temp -- ) + '[ + _ + [ 0 %load-immediate ] + [ swap [ %replace ] with each ] bi + ] unless-empty ; + +GENERIC# save-gc-root 1 ( gc-root operand temp -- ) + +M:: spill-slot save-gc-root ( gc-root operand temp -- ) + temp operand n>> %reload-integer + gc-root temp %save-gc-root ; + +M: object save-gc-root drop %save-gc-root ; + +: save-gc-roots ( gc-roots temp -- ) '[ _ save-gc-root ] assoc-each ; + +GENERIC# load-gc-root 1 ( gc-root operand temp -- ) + +M:: spill-slot load-gc-root ( gc-root operand temp -- ) + gc-root temp %load-gc-root + temp operand n>> %spill-integer ; + +M: object load-gc-root drop %load-gc-root ; + +: load-gc-roots ( gc-roots temp -- ) '[ _ load-gc-root ] assoc-each ; + M: _gc generate-insn + "no-gc" define-label { - [ temp1>> ] - [ temp2>> ] - [ gc-roots>> ] - [ gc-root-count>> ] - } cleave %gc ; + [ [ "no-gc" get ] dip [ temp1>> ] [ temp2>> ] bi %check-nursery ] + [ [ uninitialized-locs>> ] [ temp1>> ] bi wipe-locs ] + [ [ gc-roots>> ] [ temp1>> ] bi save-gc-roots ] + [ gc-root-count>> %call-gc ] + [ [ gc-roots>> ] [ temp1>> ] bi load-gc-roots ] + } cleave + "no-gc" resolve-label ; M: _loop-entry generate-insn drop %loop-entry ; diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index deb44db41a..e4c8f3246d 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -128,7 +128,12 @@ HOOK: %alien-global cpu ( dst symbol library -- ) HOOK: %allot cpu ( dst size class temp -- ) HOOK: %write-barrier cpu ( src card# table -- ) -HOOK: %gc cpu ( temp1 temp2 live-registers live-spill-slots -- ) + +! GC checks +HOOK: %check-nursery cpu ( label temp1 temp2 -- ) +HOOK: %save-gc-root cpu ( gc-root register -- ) +HOOK: %load-gc-root cpu ( gc-root register -- ) +HOOK: %call-gc cpu ( gc-root-count -- ) HOOK: %prologue cpu ( n -- ) HOOK: %epilogue cpu ( n -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 003eccfa18..7ce73d2c4b 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -76,8 +76,12 @@ HOOK: reserved-area-size os ( -- n ) : xt-save ( n -- i ) 2 cells - ; ! Next, we have the spill area as well as the FFI parameter area. -! They overlap, since basic blocks with FFI calls will never -! spill. +! It is safe for them to overlap, since basic blocks with FFI calls +! will never spill -- indeed, basic blocks with FFI calls do not +! use vregs at all, and the FFI call is a stack analysis sync point. +! In the future this will change and the stack frame logic will +! need to be untangled somewhat. + : param@ ( n -- x ) reserved-area-size + ; inline : param-save-size ( -- n ) 8 cells ; foldable @@ -85,32 +89,30 @@ HOOK: reserved-area-size os ( -- n ) : local@ ( n -- x ) reserved-area-size param-save-size + + ; inline -: spill-integer-base ( -- n ) - stack-frame get spill-counts>> double-float-regs swap at - double-float-regs reg-size * ; - : spill-integer@ ( n -- offset ) - cells spill-integer-base + param@ ; + spill-integer-offset param@ ; : spill-float@ ( n -- offset ) - double-float-regs reg-size * param@ ; + spill-float-offset param@ ; ! Some FP intrinsics need a temporary scratch area in the stack -! frame, 8 bytes in size +! frame, 8 bytes in size. This is in the param-save area so it +! should not overlap with spill slots. : scratch@ ( n -- offset ) stack-frame get total-size>> factor-area-size - param-save-size - + ; +! GC root area +: gc-root@ ( n -- offset ) + gc-root-offset param@ ; + ! Finally we have the linkage area HOOK: lr-save os ( -- n ) M: ppc stack-frame-size ( stack-frame -- i ) - [ spill-counts>> [ swap reg-size * ] { } assoc>map sum ] - [ params>> ] - [ return>> ] - tri + + + (stack-frame-size) param-save-size + reserved-area-size + factor-area-size + @@ -176,95 +178,28 @@ M: ppc %or OR ; M: ppc %or-imm ORI ; M: ppc %xor XOR ; M: ppc %xor-imm XORI ; +M: ppc %shl SLW ; M: ppc %shl-imm swapd SLWI ; +M: ppc %shr-imm SRW ; M: ppc %shr-imm swapd SRWI ; +M: ppc %sar SRAW ; M: ppc %sar-imm SRAWI ; M: ppc %not NOT ; -: %alien-invoke-tail ( func dll -- ) - [ scratch-reg ] 2dip %alien-global scratch-reg MTCTR BCTR ; - -:: exchange-regs ( r1 r2 -- ) - scratch-reg r1 MR - r1 r2 MR - r2 scratch-reg MR ; - -: ?MR ( r1 r2 -- ) 2dup = [ 2drop ] [ MR ] if ; - -:: move>args ( src1 src2 -- ) - { - { [ src1 4 = ] [ 3 src2 ?MR 3 4 exchange-regs ] } - { [ src1 3 = ] [ 4 src2 ?MR ] } - { [ src2 3 = ] [ 4 src1 ?MR 3 4 exchange-regs ] } - { [ src2 4 = ] [ 3 src1 ?MR ] } - [ 3 src1 MR 4 src2 MR ] - } cond ; - -: clear-xer ( -- ) +:: overflow-template ( label dst src1 src2 insn -- ) 0 0 LI - 0 MTXER ; inline + 0 MTXER + dst src2 src1 insn call + label BNO ; inline -:: overflow-template ( src1 src2 insn func -- ) - "no-overflow" define-label - clear-xer - scratch-reg src2 src1 insn call - scratch-reg ds-reg 0 STW - "no-overflow" get BNO - src1 src2 move>args - %prepare-alien-invoke - func f %alien-invoke - "no-overflow" resolve-label ; inline +M: ppc %fixnum-add ( label dst src1 src2 -- ) + [ ADDO. ] overflow-template ; -:: overflow-template-tail ( src1 src2 insn func -- ) - "overflow" define-label - clear-xer - scratch-reg src2 src1 insn call - "overflow" get BO - scratch-reg ds-reg 0 STW - BLR - "overflow" resolve-label - src1 src2 move>args - %prepare-alien-invoke - func f %alien-invoke-tail ; inline +M: ppc %fixnum-sub ( label dst src1 src2 -- ) + [ SUBFO. ] overflow-template ; -M: ppc %fixnum-add ( src1 src2 -- ) - [ ADDO. ] "overflow_fixnum_add" overflow-template ; - -M: ppc %fixnum-add-tail ( src1 src2 -- ) - [ ADDO. ] "overflow_fixnum_add" overflow-template-tail ; - -M: ppc %fixnum-sub ( src1 src2 -- ) - [ SUBFO. ] "overflow_fixnum_subtract" overflow-template ; - -M: ppc %fixnum-sub-tail ( src1 src2 -- ) - [ SUBFO. ] "overflow_fixnum_subtract" overflow-template-tail ; - -M:: ppc %fixnum-mul ( src1 src2 temp1 temp2 -- ) - "no-overflow" define-label - clear-xer - temp1 src1 tag-bits get SRAWI - temp2 temp1 src2 MULLWO. - temp2 ds-reg 0 STW - "no-overflow" get BNO - src2 src2 tag-bits get SRAWI - temp1 src2 move>args - %prepare-alien-invoke - "overflow_fixnum_multiply" f %alien-invoke - "no-overflow" resolve-label ; - -M:: ppc %fixnum-mul-tail ( src1 src2 temp1 temp2 -- ) - "overflow" define-label - clear-xer - temp1 src1 tag-bits get SRAWI - temp2 temp1 src2 MULLWO. - "overflow" get BO - temp2 ds-reg 0 STW - BLR - "overflow" resolve-label - src2 src2 tag-bits get SRAWI - temp1 src2 move>args - %prepare-alien-invoke - "overflow_fixnum_multiply" f %alien-invoke-tail ; +M:: ppc %fixnum-mul ( label dst src1 src2 -- ) + [ MULLWO. ] overflow-template ; : bignum@ ( n -- offset ) cells bignum tag-number - ; inline @@ -462,17 +397,26 @@ M:: ppc %write-barrier ( src card# table -- ) src card# deck-bits SRWI table scratch-reg card# STBX ; -M:: ppc %gc ( temp1 temp2 gc-roots gc-root-count -- ) - "end" define-label +M:: ppc %check-nursery ( label temp1 temp2 -- ) temp2 load-zone-ptr temp1 temp2 cell LWZ temp2 temp2 3 cells LWZ - temp1 temp1 1024 ADDI ! add ALLOT_BUFFER_ZONE to here - temp1 0 temp2 CMP ! is here >= end? - "end" get BLE + ! add ALLOT_BUFFER_ZONE to here + temp1 temp1 1024 ADDI + ! is here >= end? + temp1 0 temp2 CMP + label BLE ; + +M:: ppc %save-gc-root ( gc-root register -- ) + register 1 gc-root gc-root@ STW ; + +M:: ppc %load-gc-root ( gc-root register -- ) + register 1 gc-root gc-root@ LWZ ; + +M:: ppc %call-gc ( gc-root-count -- ) %prepare-alien-invoke - 0 3 LI - 0 4 LI + 3 1 gc-root-base param@ ADDI + gc-root-count 4 LI "inline_gc" f %alien-invoke "end" resolve-label ; diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 727131aa25..76699c1306 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -1,13 +1,12 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: locals alien.c-types alien.syntax arrays kernel fry -math namespaces sequences system layouts io vocabs.loader -accessors init combinators command-line cpu.x86.assembler -cpu.x86 cpu.architecture make compiler compiler.units +USING: locals alien.c-types alien.syntax arrays kernel fry math +namespaces sequences system layouts io vocabs.loader accessors init +combinators command-line make compiler compiler.units compiler.constants compiler.alien compiler.codegen -compiler.codegen.fixup compiler.cfg.instructions -compiler.cfg.builder compiler.cfg.intrinsics -compiler.cfg.stack-frame ; +compiler.codegen.fixup compiler.cfg.instructions compiler.cfg.builder +compiler.cfg.intrinsics compiler.cfg.stack-frame cpu.x86.assembler +cpu.x86.assembler.operands cpu.x86 cpu.architecture ; IN: cpu.x86.32 ! We implement the FFI for Linux, OS X and Windows all at once. diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor index 490d37ccbc..674cc817d7 100644 --- a/basis/cpu/x86/32/bootstrap.factor +++ b/basis/cpu/x86/32/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser compiler.constants ; +cpu.x86.assembler cpu.x86.assembler.operands layouts +vocabs parser compiler.constants ; IN: bootstrap.x86 4 \ cell set diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index 8eb04eb2b5..f837c7de73 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -1,12 +1,11 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays kernel math namespaces make sequences -system layouts alien alien.c-types alien.accessors alien.structs -slots splitting assocs combinators locals cpu.x86.assembler -cpu.x86 cpu.architecture compiler.constants -compiler.codegen compiler.codegen.fixup -compiler.cfg.instructions compiler.cfg.builder -compiler.cfg.intrinsics compiler.cfg.stack-frame ; +USING: accessors arrays kernel math namespaces make sequences system +layouts alien alien.c-types alien.accessors alien.structs slots +splitting assocs combinators locals compiler.constants +compiler.codegen compiler.codegen.fixup compiler.cfg.instructions +compiler.cfg.builder compiler.cfg.intrinsics compiler.cfg.stack-frame +cpu.x86.assembler cpu.x86.assembler.operands cpu.x86 cpu.architecture ; IN: cpu.x86.64 M: x86.64 machine-registers diff --git a/basis/cpu/x86/64/bootstrap.factor b/basis/cpu/x86/64/bootstrap.factor index c5c7e63dbc..8b0d53cda5 100644 --- a/basis/cpu/x86/64/bootstrap.factor +++ b/basis/cpu/x86/64/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser compiler.constants math ; +layouts vocabs parser compiler.constants math +cpu.x86.assembler cpu.x86.assembler.operands ; IN: bootstrap.x86 8 \ cell set diff --git a/basis/cpu/x86/64/unix/bootstrap.factor b/basis/cpu/x86/64/unix/bootstrap.factor index e48a20a9de..b6d56840e2 100644 --- a/basis/cpu/x86/64/unix/bootstrap.factor +++ b/basis/cpu/x86/64/unix/bootstrap.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser ; +cpu.x86.assembler cpu.x86.assembler.operands layouts vocabs parser ; IN: bootstrap.x86 : stack-frame-size ( -- n ) 4 bootstrap-cells ; diff --git a/basis/cpu/x86/64/unix/unix.factor b/basis/cpu/x86/64/unix/unix.factor index eea960d03d..7ab25b6d3f 100644 --- a/basis/cpu/x86/64/unix/unix.factor +++ b/basis/cpu/x86/64/unix/unix.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays sequences math splitting make assocs -kernel layouts system alien.c-types alien.structs -cpu.architecture cpu.x86.assembler cpu.x86 -compiler.codegen compiler.cfg.registers ; +USING: accessors arrays sequences math splitting make assocs kernel +layouts system alien.c-types alien.structs cpu.architecture +cpu.x86.assembler cpu.x86.assembler.operands cpu.x86 compiler.codegen +compiler.cfg.registers ; IN: cpu.x86.64.unix M: int-regs param-regs drop { RDI RSI RDX RCX R8 R9 } ; diff --git a/basis/cpu/x86/64/winnt/bootstrap.factor b/basis/cpu/x86/64/winnt/bootstrap.factor index ff15ef27af..0228082956 100644 --- a/basis/cpu/x86/64/winnt/bootstrap.factor +++ b/basis/cpu/x86/64/winnt/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser ; +layouts vocabs parser cpu.x86.assembler +cpu.x86.assembler.operands ; IN: bootstrap.x86 : stack-frame-size ( -- n ) 8 bootstrap-cells ; diff --git a/basis/cpu/x86/assembler/assembler-tests.factor b/basis/cpu/x86/assembler/assembler-tests.factor index 66adee6bf6..47d6434279 100644 --- a/basis/cpu/x86/assembler/assembler-tests.factor +++ b/basis/cpu/x86/assembler/assembler-tests.factor @@ -1,6 +1,9 @@ -USING: cpu.x86.assembler kernel tools.test namespaces make ; +USING: cpu.x86.assembler cpu.x86.assembler.operands +kernel tools.test namespaces make ; IN: cpu.x86.assembler.tests +[ { HEX: 40 HEX: 8a HEX: 2a } ] [ [ BPL RDX [] MOV ] { } make ] unit-test + [ { HEX: 49 HEX: 89 HEX: 04 HEX: 24 } ] [ [ R12 [] RAX MOV ] { } make ] unit-test [ { HEX: 49 HEX: 8b HEX: 06 } ] [ [ RAX R14 [] MOV ] { } make ] unit-test @@ -68,6 +71,7 @@ IN: cpu.x86.assembler.tests ! sse shift instructions [ { HEX: 66 HEX: 0f HEX: 71 HEX: d0 HEX: 05 } ] [ [ XMM0 5 PSRLW ] { } make ] unit-test +[ { HEX: 66 HEX: 0f HEX: d1 HEX: c1 } ] [ [ XMM0 XMM1 PSRLW ] { } make ] unit-test ! sse comparison instructions [ { HEX: 66 HEX: 0f HEX: c2 HEX: c1 HEX: 02 } ] [ [ XMM0 XMM1 CMPLEPD ] { } make ] unit-test diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index e91ebdcb1a..2b99513fc1 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -1,90 +1,15 @@ -! Copyright (C) 2005, 2009 Slava Pestov. +! Copyright (C) 2005, 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays io.binary kernel combinators kernel.private math +USING: arrays io.binary kernel combinators kernel.private math locals namespaces make sequences words system layouts math.order accessors -cpu.x86.assembler.syntax ; +cpu.x86.assembler.operands cpu.x86.assembler.operands.private ; QUALIFIED: sequences IN: cpu.x86.assembler ! A postfix assembler for x86-32 and x86-64. -! In 32-bit mode, { 1234 } is absolute indirect addressing. -! In 64-bit mode, { 1234 } is RIP-relative. -! Beware! - -! Register operands -- eg, ECX -REGISTERS: 8 AL CL DL BL SPL BPL SIL DIL R8B R9B R10B R11B R12B R13B R14B R15B ; - -ALIAS: AH SPL -ALIAS: CH BPL -ALIAS: DH SIL -ALIAS: BH DIL - -REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ; - -REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ; - -REGISTERS: 64 -RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ; - -REGISTERS: 128 -XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 -XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ; - -TUPLE: byte value ; - -C: byte - ; - -! Addressing modes -TUPLE: indirect base index scale displacement ; - -M: indirect extended? base>> extended? ; - -: canonicalize-EBP ( indirect -- indirect ) - #! { EBP } ==> { EBP 0 } - dup [ base>> { EBP RBP R13 } member? ] [ displacement>> not ] bi and - [ 0 >>displacement ] when ; - -ERROR: bad-index indirect ; - -: check-ESP ( indirect -- indirect ) - dup index>> { ESP RSP } memq? [ bad-index ] when ; - -: canonicalize ( indirect -- indirect ) - #! Modify the indirect to work around certain addressing mode - #! quirks. - canonicalize-EBP check-ESP ; - -: ( base index scale displacement -- indirect ) - indirect boa canonicalize ; - : reg-code ( reg -- n ) "register" word-prop 7 bitand ; : indirect-base* ( op -- n ) base>> EBP or reg-code ; @@ -159,27 +84,13 @@ M: indirect displacement, dup displacement>> dup [ swap base>> [ dup fits-in-byte? [ , ] [ 4, ] if ] [ 4, ] if - ] [ - 2drop - ] if ; + ] [ 2drop ] if ; M: register displacement, drop ; : addressing ( reg# indirect -- ) [ mod-r/m, ] [ sib, ] [ displacement, ] tri ; -! Utilities -UNION: operand register indirect ; - -GENERIC: operand-64? ( operand -- ? ) - -M: indirect operand-64? - [ base>> ] [ index>> ] bi [ operand-64? ] either? ; - -M: register-64 operand-64? drop t ; - -M: object operand-64? drop f ; - : rex.w? ( rex.w reg r/m -- ? ) { { [ dup register-128? ] [ drop operand-64? ] } @@ -192,22 +103,25 @@ M: object operand-64? drop f ; : rex.b ( m op -- n ) [ extended? [ BIN: 00000001 bitor ] when ] keep - dup indirect? [ - index>> extended? [ BIN: 00000010 bitor ] when - ] [ - drop - ] if ; + dup indirect? [ index>> extended? [ BIN: 00000010 bitor ] when ] [ drop ] if ; -: rex-prefix ( reg r/m rex.w -- ) +: no-prefix? ( prefix reg r/m -- ? ) + [ BIN: 01000000 = ] + [ extended-8-bit-register? not ] + [ extended-8-bit-register? not ] tri* + and and ; + +:: rex-prefix ( reg r/m rex.w -- ) #! Compile an AMD64 REX prefix. - 2over rex.w? BIN: 01001000 BIN: 01000000 ? - swap rex.r swap rex.b - dup BIN: 01000000 = [ drop ] [ , ] if ; + rex.w reg r/m rex.w? BIN: 01001000 BIN: 01000000 ? + r/m rex.r + reg rex.b + dup reg r/m no-prefix? [ drop ] [ , ] if ; : 16-prefix ( reg r/m -- ) [ register-16? ] either? [ HEX: 66 , ] when ; -: prefix ( reg r/m rex.w -- ) 2over 16-prefix rex-prefix ; +: prefix ( reg r/m rex.w -- ) [ drop 16-prefix ] [ rex-prefix ] 3bi ; : prefix-1 ( reg rex.w -- ) f swap prefix ; @@ -269,22 +183,10 @@ M: object operand-64? drop f ; : 2-operand ( dst src op -- ) #! Sets the opcode's direction bit. It is set if the #! destination is a direct register operand. - 2over 16-prefix - direction-bit - operand-size-bit - (2-operand) ; + [ drop 16-prefix ] [ direction-bit operand-size-bit (2-operand) ] 3bi ; PRIVATE> -: [] ( reg/displacement -- indirect ) - dup integer? [ [ f f f ] dip ] [ f f f ] if ; - -: [+] ( reg displacement -- indirect ) - dup integer? - [ dup zero? [ drop f ] when [ f f ] dip ] - [ f f ] if - ; - ! Moving stuff GENERIC: PUSH ( op -- ) M: register PUSH f HEX: 50 short-operand ; @@ -681,24 +583,57 @@ ALIAS: PINSRQ PINSRD : MAXPD ( dest src -- ) HEX: 5f HEX: 66 2-operand-rm-sse ; : MAXSD ( dest src -- ) HEX: 5f HEX: f2 2-operand-rm-sse ; : MAXSS ( dest src -- ) HEX: 5f HEX: f3 2-operand-rm-sse ; +: PUNPCKLBW ( dest src -- ) HEX: 60 HEX: 66 2-operand-rm-sse ; +: PUNPCKLWD ( dest src -- ) HEX: 61 HEX: 66 2-operand-rm-sse ; +: PUNPCKLDQ ( dest src -- ) HEX: 62 HEX: 66 2-operand-rm-sse ; +: PACKSSWB ( dest src -- ) HEX: 63 HEX: 66 2-operand-rm-sse ; +: PCMPGTB ( dest src -- ) HEX: 64 HEX: 66 2-operand-rm-sse ; +: PCMPGTW ( dest src -- ) HEX: 65 HEX: 66 2-operand-rm-sse ; +: PCMPGTD ( dest src -- ) HEX: 66 HEX: 66 2-operand-rm-sse ; +: PACKUSWB ( dest src -- ) HEX: 67 HEX: 66 2-operand-rm-sse ; +: PUNPCKHBW ( dest src -- ) HEX: 68 HEX: 66 2-operand-rm-sse ; +: PUNPCKHWD ( dest src -- ) HEX: 69 HEX: 66 2-operand-rm-sse ; +: PUNPCKHDQ ( dest src -- ) HEX: 6a HEX: 66 2-operand-rm-sse ; +: PACKSSDW ( dest src -- ) HEX: 6b HEX: 66 2-operand-rm-sse ; : PUNPCKLQDQ ( dest src -- ) HEX: 6c HEX: 66 2-operand-rm-sse ; : PUNPCKHQDQ ( dest src -- ) HEX: 6d HEX: 66 2-operand-rm-sse ; +: MOVD ( dest src -- ) { HEX: 6e HEX: 7e } HEX: 66 2-operand-rm-mr-sse ; : MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ; : MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ; : PSHUFD ( dest src imm -- ) HEX: 70 HEX: 66 3-operand-rm-sse ; : PSHUFLW ( dest src imm -- ) HEX: 70 HEX: f2 3-operand-rm-sse ; : PSHUFHW ( dest src imm -- ) HEX: 70 HEX: f3 3-operand-rm-sse ; -: PSRLW ( dest imm -- ) BIN: 010 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSRAW ( dest imm -- ) BIN: 100 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSLLW ( dest imm -- ) BIN: 110 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSRLD ( dest imm -- ) BIN: 010 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSRAD ( dest imm -- ) BIN: 100 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSLLD ( dest imm -- ) BIN: 110 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSRLQ ( dest imm -- ) BIN: 010 HEX: 73 HEX: 66 2-operand-sse-shift ; + +: (PSRLW-imm) ( dest imm -- ) BIN: 010 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSRAW-imm) ( dest imm -- ) BIN: 100 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSLLW-imm) ( dest imm -- ) BIN: 110 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSRLD-imm) ( dest imm -- ) BIN: 010 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSRAD-imm) ( dest imm -- ) BIN: 100 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSLLD-imm) ( dest imm -- ) BIN: 110 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSRLQ-imm) ( dest imm -- ) BIN: 010 HEX: 73 HEX: 66 2-operand-sse-shift ; +: (PSLLQ-imm) ( dest imm -- ) BIN: 110 HEX: 73 HEX: 66 2-operand-sse-shift ; + +: (PSRLW-reg) ( dest src -- ) HEX: d1 HEX: 66 2-operand-rm-sse ; +: (PSRLD-reg) ( dest src -- ) HEX: d2 HEX: 66 2-operand-rm-sse ; +: (PSRLQ-reg) ( dest src -- ) HEX: d3 HEX: 66 2-operand-rm-sse ; +: (PSRAW-reg) ( dest src -- ) HEX: e1 HEX: 66 2-operand-rm-sse ; +: (PSRAD-reg) ( dest src -- ) HEX: e2 HEX: 66 2-operand-rm-sse ; +: (PSLLW-reg) ( dest src -- ) HEX: f1 HEX: 66 2-operand-rm-sse ; +: (PSLLD-reg) ( dest src -- ) HEX: f2 HEX: 66 2-operand-rm-sse ; +: (PSLLQ-reg) ( dest src -- ) HEX: f3 HEX: 66 2-operand-rm-sse ; + +: PSRLW ( dest src -- ) dup integer? [ (PSRLW-imm) ] [ (PSRLW-reg) ] if ; +: PSRAW ( dest src -- ) dup integer? [ (PSRAW-imm) ] [ (PSRAW-reg) ] if ; +: PSLLW ( dest src -- ) dup integer? [ (PSLLW-imm) ] [ (PSLLW-reg) ] if ; +: PSRLD ( dest src -- ) dup integer? [ (PSRLD-imm) ] [ (PSRLD-reg) ] if ; +: PSRAD ( dest src -- ) dup integer? [ (PSRAD-imm) ] [ (PSRAD-reg) ] if ; +: PSLLD ( dest src -- ) dup integer? [ (PSLLD-imm) ] [ (PSLLD-reg) ] if ; +: PSRLQ ( dest src -- ) dup integer? [ (PSRLQ-imm) ] [ (PSRLQ-reg) ] if ; +: PSLLQ ( dest src -- ) dup integer? [ (PSLLQ-imm) ] [ (PSLLQ-reg) ] if ; + : PSRLDQ ( dest imm -- ) BIN: 011 HEX: 73 HEX: 66 2-operand-sse-shift ; -: PSLLQ ( dest imm -- ) BIN: 110 HEX: 73 HEX: 66 2-operand-sse-shift ; : PSLLDQ ( dest imm -- ) BIN: 111 HEX: 73 HEX: 66 2-operand-sse-shift ; : PCMPEQB ( dest src -- ) HEX: 74 HEX: 66 2-operand-rm-sse ; @@ -709,11 +644,14 @@ ALIAS: PINSRQ PINSRD : HSUBPD ( dest src -- ) HEX: 7d HEX: 66 2-operand-rm-sse ; : HSUBPS ( dest src -- ) HEX: 7d HEX: f2 2-operand-rm-sse ; +: FXSAVE ( dest -- ) { BIN: 000 f { HEX: 0f HEX: ae } } 1-operand ; +: FXRSTOR ( src -- ) { BIN: 001 f { HEX: 0f HEX: ae } } 1-operand ; : LDMXCSR ( src -- ) { BIN: 010 f { HEX: 0f HEX: ae } } 1-operand ; : STMXCSR ( dest -- ) { BIN: 011 f { HEX: 0f HEX: ae } } 1-operand ; : LFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 350 , ; : MFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 360 , ; : SFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 370 , ; +: CLFLUSH ( dest -- ) { BIN: 111 f { HEX: 0f HEX: ae } } 1-operand ; : POPCNT ( dest src -- ) HEX: b8 HEX: f3 2-operand-rm-sse ; @@ -762,26 +700,46 @@ ALIAS: PINSRQ PINSRD : ADDSUBPD ( dest src -- ) HEX: d0 HEX: 66 2-operand-rm-sse ; : ADDSUBPS ( dest src -- ) HEX: d0 HEX: f2 2-operand-rm-sse ; : PADDQ ( dest src -- ) HEX: d4 HEX: 66 2-operand-rm-sse ; +: PMULLW ( dest src -- ) HEX: d5 HEX: 66 2-operand-rm-sse ; +: PMOVMSKB ( dest src -- ) HEX: d7 HEX: 66 2-operand-rm-sse ; +: PSUBUSB ( dest src -- ) HEX: d8 HEX: 66 2-operand-rm-sse ; +: PSUBUSW ( dest src -- ) HEX: d9 HEX: 66 2-operand-rm-sse ; : PMINUB ( dest src -- ) HEX: da HEX: 66 2-operand-rm-sse ; +: PAND ( dest src -- ) HEX: db HEX: 66 2-operand-rm-sse ; +: PADDUSB ( dest src -- ) HEX: dc HEX: 66 2-operand-rm-sse ; +: PADDUSW ( dest src -- ) HEX: dd HEX: 66 2-operand-rm-sse ; : PMAXUB ( dest src -- ) HEX: de HEX: 66 2-operand-rm-sse ; +: PANDN ( dest src -- ) HEX: df HEX: 66 2-operand-rm-sse ; : PAVGB ( dest src -- ) HEX: e0 HEX: 66 2-operand-rm-sse ; : PAVGW ( dest src -- ) HEX: e3 HEX: 66 2-operand-rm-sse ; : PMULHUW ( dest src -- ) HEX: e4 HEX: 66 2-operand-rm-sse ; +: PMULHW ( dest src -- ) HEX: e5 HEX: 66 2-operand-rm-sse ; : CVTTPD2DQ ( dest src -- ) HEX: e6 HEX: 66 2-operand-rm-sse ; : CVTPD2DQ ( dest src -- ) HEX: e6 HEX: f2 2-operand-rm-sse ; : CVTDQ2PD ( dest src -- ) HEX: e6 HEX: f3 2-operand-rm-sse ; : MOVNTDQ ( dest src -- ) HEX: e7 HEX: 66 2-operand-mr-sse ; +: PSUBSB ( dest src -- ) HEX: e8 HEX: 66 2-operand-rm-sse ; +: PSUBSW ( dest src -- ) HEX: e9 HEX: 66 2-operand-rm-sse ; : PMINSW ( dest src -- ) HEX: ea HEX: 66 2-operand-rm-sse ; +: POR ( dest src -- ) HEX: eb HEX: 66 2-operand-rm-sse ; +: PADDSB ( dest src -- ) HEX: ec HEX: 66 2-operand-rm-sse ; +: PADDSW ( dest src -- ) HEX: ed HEX: 66 2-operand-rm-sse ; : PMAXSW ( dest src -- ) HEX: ee HEX: 66 2-operand-rm-sse ; +: PXOR ( dest src -- ) HEX: ef HEX: 66 2-operand-rm-sse ; : LDDQU ( dest src -- ) HEX: f0 HEX: f2 2-operand-rm-sse ; : PMULUDQ ( dest src -- ) HEX: f4 HEX: 66 2-operand-rm-sse ; +: PMADDWD ( dest src -- ) HEX: f5 HEX: 66 2-operand-rm-sse ; : PSADBW ( dest src -- ) HEX: f6 HEX: 66 2-operand-rm-sse ; - : MASKMOVDQU ( dest src -- ) HEX: f7 HEX: 66 2-operand-rm-sse ; - +: PSUBB ( dest src -- ) HEX: f8 HEX: 66 2-operand-rm-sse ; +: PSUBW ( dest src -- ) HEX: f9 HEX: 66 2-operand-rm-sse ; +: PSUBD ( dest src -- ) HEX: fa HEX: 66 2-operand-rm-sse ; : PSUBQ ( dest src -- ) HEX: fb HEX: 66 2-operand-rm-sse ; +: PADDB ( dest src -- ) HEX: fc HEX: 66 2-operand-rm-sse ; +: PADDW ( dest src -- ) HEX: fd HEX: 66 2-operand-rm-sse ; +: PADDD ( dest src -- ) HEX: fe HEX: 66 2-operand-rm-sse ; ! x86-64 branch prediction hints diff --git a/basis/cpu/x86/assembler/authors.txt b/basis/cpu/x86/assembler/authors.txt index 1901f27a24..580f882c8d 100755 --- a/basis/cpu/x86/assembler/authors.txt +++ b/basis/cpu/x86/assembler/authors.txt @@ -1 +1,2 @@ Slava Pestov +Joe Groff diff --git a/basis/cpu/x86/assembler/operands/operands.factor b/basis/cpu/x86/assembler/operands/operands.factor new file mode 100644 index 0000000000..d3cb66ff12 --- /dev/null +++ b/basis/cpu/x86/assembler/operands/operands.factor @@ -0,0 +1,118 @@ +! Copyright (C) 2008, 2009 Slava Pestov, Joe Groff. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel words math accessors sequences namespaces +assocs layouts cpu.x86.assembler.syntax ; +IN: cpu.x86.assembler.operands + +! In 32-bit mode, { 1234 } is absolute indirect addressing. +! In 64-bit mode, { 1234 } is RIP-relative. +! Beware! + +REGISTERS: 8 AL CL DL BL SPL BPL SIL DIL R8B R9B R10B R11B R12B R13B R14B R15B ; + +ALIAS: AH SPL +ALIAS: CH BPL +ALIAS: DH SIL +ALIAS: BH DIL + +REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ; + +REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ; + +REGISTERS: 64 +RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ; + +REGISTERS: 128 +XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 +XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ; + + ; + +! Addressing modes +TUPLE: indirect base index scale displacement ; + +M: indirect extended? base>> extended? ; + +: canonicalize-EBP ( indirect -- indirect ) + #! { EBP } ==> { EBP 0 } + dup [ base>> { EBP RBP R13 } member? ] [ displacement>> not ] bi and + [ 0 >>displacement ] when ; + +ERROR: bad-index indirect ; + +: check-ESP ( indirect -- indirect ) + dup index>> { ESP RSP } memq? [ bad-index ] when ; + +: canonicalize ( indirect -- indirect ) + #! Modify the indirect to work around certain addressing mode + #! quirks. + canonicalize-EBP check-ESP ; + +: ( base index scale displacement -- indirect ) + indirect boa canonicalize ; + +! Utilities +UNION: operand register indirect ; + +GENERIC: operand-64? ( operand -- ? ) + +M: indirect operand-64? + [ base>> ] [ index>> ] bi [ operand-64? ] either? ; + +M: register-64 operand-64? drop t ; + +M: object operand-64? drop f ; + +PRIVATE> + +: [] ( reg/displacement -- indirect ) + dup integer? [ [ f f f ] dip ] [ f f f ] if ; + +: [+] ( reg displacement -- indirect ) + dup integer? + [ dup zero? [ drop f ] when [ f f ] dip ] + [ f f ] if + ; + +TUPLE: byte value ; + +C: byte + +: extended-8-bit-register? ( register -- ? ) + { SPL BPL SIL DIL } memq? ; + +: n-bit-version-of ( register n -- register' ) + ! Certain 8-bit registers don't exist in 32-bit mode... + [ "register" word-prop ] dip registers get at nth + dup extended-8-bit-register? cell 4 = and + [ drop f ] when ; + +: 8-bit-version-of ( register -- register' ) 8 n-bit-version-of ; +: 16-bit-version-of ( register -- register' ) 16 n-bit-version-of ; +: 32-bit-version-of ( register -- register' ) 32 n-bit-version-of ; +: 64-bit-version-of ( register -- register' ) 64 n-bit-version-of ; +: native-version-of ( register -- register' ) cell-bits n-bit-version-of ; \ No newline at end of file diff --git a/basis/cpu/x86/assembler/syntax/syntax.factor b/basis/cpu/x86/assembler/syntax/syntax.factor index 631dcaa8f7..5b65c19155 100644 --- a/basis/cpu/x86/assembler/syntax/syntax.factor +++ b/basis/cpu/x86/assembler/syntax/syntax.factor @@ -1,14 +1,23 @@ -! Copyright (C) 2008 Slava Pestov. +! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel words words.symbol sequences lexer parser fry ; +USING: kernel words words.symbol sequences lexer parser fry +namespaces combinators assocs ; IN: cpu.x86.assembler.syntax -: define-register ( name num size -- ) - [ "cpu.x86.assembler" create dup define-symbol ] 2dip - [ dupd "register" set-word-prop ] dip - "register-size" set-word-prop ; +SYMBOL: registers -: define-registers ( names size -- ) - '[ _ define-register ] each-index ; +registers [ H{ } clone ] initialize -SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ; +: define-register ( name num size -- word ) + [ "cpu.x86.assembler.operands" create ] 2dip { + [ 2drop ] + [ 2drop define-symbol ] + [ drop "register" set-word-prop ] + [ nip "register-size" set-word-prop ] + } 3cleave ; + +: define-registers ( size names -- ) + [ swap '[ _ define-register ] map-index ] [ drop ] 2bi + registers get set-at ; + +SYNTAX: REGISTERS: scan-word ";" parse-tokens define-registers ; diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index 474ce2ea46..6363f17e48 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: bootstrap.image.private kernel kernel.private namespaces -system cpu.x86.assembler layouts compiler.units math -math.private compiler.constants vocabs slots.private words -locals.backend make sequences combinators arrays ; +USING: bootstrap.image.private kernel kernel.private namespaces system +layouts compiler.units math math.private compiler.constants vocabs +slots.private words locals.backend make sequences combinators arrays + cpu.x86.assembler cpu.x86.assembler.operands ; IN: bootstrap.x86 big-endian off diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 258f842598..34b1b63581 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs alien alien.c-types arrays strings -cpu.x86.assembler cpu.x86.assembler.private cpu.architecture -kernel kernel.private math memory namespaces make sequences -words system layouts combinators math.order fry locals +cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands +cpu.architecture kernel kernel.private math memory namespaces make +sequences words system layouts combinators math.order fry locals compiler.constants compiler.cfg.registers compiler.cfg.instructions @@ -264,93 +264,118 @@ M:: x86 %box-alien ( dst src temp -- ) "end" resolve-label ] with-scope ; -: small-reg-8 ( reg -- reg' ) - H{ - { EAX RAX } - { ECX RCX } - { EDX RDX } - { EBX RBX } - { ESP RSP } - { EBP RBP } - { ESI RSP } - { EDI RDI } +! The 'small-reg' mess is pretty crappy, but its only used on x86-32. +! On x86-64, all registers have 8-bit versions. However, a similar +! problem arises for shifts, where the shift count must be in CL, and +! so one day I will fix this properly by adding precoloring to the +! register allocator. - { RAX RAX } - { RCX RCX } - { RDX RDX } - { RBX RBX } - { RSP RSP } - { RBP RBP } - { RSI RSP } - { RDI RDI } - } at ; inline +HOOK: has-small-reg? cpu ( reg size -- ? ) -: small-reg-4 ( reg -- reg' ) - small-reg-8 H{ - { RAX EAX } - { RCX ECX } - { RDX EDX } - { RBX EBX } - { RSP ESP } - { RBP EBP } - { RSI ESP } - { RDI EDI } - } at ; inline +CONSTANT: have-byte-regs { EAX ECX EDX EBX } -: small-reg-2 ( reg -- reg' ) - small-reg-4 H{ - { EAX AX } - { ECX CX } - { EDX DX } - { EBX BX } - { ESP SP } - { EBP BP } - { ESI SI } - { EDI DI } - } at ; inline - -: small-reg-1 ( reg -- reg' ) - small-reg-4 { - { EAX AL } - { ECX CL } - { EDX DL } - { EBX BL } - } at ; inline - -: small-reg ( reg size -- reg' ) +M: x86.32 has-small-reg? { - { 1 [ small-reg-1 ] } - { 2 [ small-reg-2 ] } - { 4 [ small-reg-4 ] } - { 8 [ small-reg-8 ] } + { 8 [ have-byte-regs memq? ] } + { 16 [ drop t ] } + { 32 [ drop t ] } } case ; -HOOK: small-regs cpu ( -- regs ) - -M: x86.32 small-regs { EAX ECX EDX EBX } ; -M: x86.64 small-regs { RAX RCX RDX RBX } ; - -HOOK: small-reg-native cpu ( reg -- reg' ) - -M: x86.32 small-reg-native small-reg-4 ; -M: x86.64 small-reg-native small-reg-8 ; +M: x86.64 has-small-reg? 2drop t ; : small-reg-that-isn't ( exclude -- reg' ) - small-regs swap [ small-reg-native ] map '[ _ memq? not ] find nip ; + [ have-byte-regs ] dip + [ native-version-of ] map + '[ _ memq? not ] find nip ; : with-save/restore ( reg quot -- ) [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline -:: with-small-register ( dst exclude quot: ( new-dst -- ) -- ) - #! If the destination register overlaps a small register, we - #! call the quot with that. Otherwise, we find a small - #! register that is not in exclude, and call quot, saving - #! and restoring the small register. - dst small-reg-native small-regs memq? [ dst quot call ] [ +:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- ) + ! If the destination register overlaps a small register with + ! 'size' bits, we call the quot with that. Otherwise, we find a + ! small register that is not in exclude, and call quot, saving and + ! restoring the small register. + dst size has-small-reg? [ dst quot call ] [ exclude small-reg-that-isn't [ quot call ] with-save/restore ] if ; inline +M:: x86 %string-nth ( dst src index temp -- ) + ! We request a small-reg of size 8 since those of size 16 are + ! a superset. + "end" define-label + dst { src index temp } 8 [| new-dst | + ! Load the least significant 7 bits into new-dst. + ! 8th bit indicates whether we have to load from + ! the aux vector or not. + temp src index [+] LEA + new-dst 8-bit-version-of temp string-offset [+] MOV + new-dst new-dst 8-bit-version-of MOVZX + ! Do we have to look at the aux vector? + new-dst HEX: 80 CMP + "end" get JL + ! Yes, this is a non-ASCII character. Load aux vector + temp src string-aux-offset [+] MOV + new-dst temp XCHG + ! Compute index + new-dst index ADD + new-dst index ADD + ! Load high 16 bits + new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV + new-dst new-dst 16-bit-version-of MOVZX + new-dst 7 SHL + ! Compute code point + new-dst temp XOR + "end" resolve-label + dst new-dst ?MOV + ] with-small-register ; + +M:: x86 %set-string-nth-fast ( ch str index temp -- ) + ch { index str temp } 8 [| new-ch | + new-ch ch ?MOV + temp str index [+] LEA + temp string-offset [+] new-ch 8-bit-version-of MOV + ] with-small-register ; + +:: %alien-integer-getter ( dst src size quot -- ) + dst { src } size [| new-dst | + new-dst dup size n-bit-version-of dup src [] MOV + quot call + dst new-dst ?MOV + ] with-small-register ; inline + +: %alien-unsigned-getter ( dst src size -- ) + [ MOVZX ] %alien-integer-getter ; inline + +M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ; +M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ; +M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ; + +: %alien-signed-getter ( dst src size -- ) + [ MOVSX ] %alien-integer-getter ; inline + +M: x86 %alien-signed-1 8 %alien-signed-getter ; +M: x86 %alien-signed-2 16 %alien-signed-getter ; +M: x86 %alien-signed-4 32 %alien-signed-getter ; + +M: x86 %alien-cell [] MOV ; +M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ; +M: x86 %alien-double [] MOVSD ; + +:: %alien-integer-setter ( ptr value size -- ) + value { ptr } size [| new-value | + new-value value ?MOV + ptr [] new-value size n-bit-version-of MOV + ] with-small-register ; inline + +M: x86 %set-alien-integer-1 8 %alien-integer-setter ; +M: x86 %set-alien-integer-2 16 %alien-integer-setter ; +M: x86 %set-alien-integer-4 32 %alien-integer-setter ; +M: x86 %set-alien-cell [ [] ] dip MOV ; +M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ; +M: x86 %set-alien-double [ [] ] dip MOVSD ; + : shift-count? ( reg -- ? ) { ECX RCX } memq? ; :: emit-shift ( dst src1 src2 quot -- ) @@ -362,7 +387,7 @@ M: x86.64 small-reg-native small-reg-8 ; src2 CL quot call dst src2 XCHG ] [ - ECX small-reg-native [ + ECX native-version-of [ CL src2 MOV drop dst CL quot call ] with-save/restore @@ -373,80 +398,6 @@ M: x86 %shl [ SHL ] emit-shift ; M: x86 %shr [ SHR ] emit-shift ; M: x86 %sar [ SAR ] emit-shift ; -M:: x86 %string-nth ( dst src index temp -- ) - "end" define-label - dst { src index temp } [| new-dst | - ! Load the least significant 7 bits into new-dst. - ! 8th bit indicates whether we have to load from - ! the aux vector or not. - temp src index [+] LEA - new-dst 1 small-reg temp string-offset [+] MOV - new-dst new-dst 1 small-reg MOVZX - ! Do we have to look at the aux vector? - new-dst HEX: 80 CMP - "end" get JL - ! Yes, this is a non-ASCII character. Load aux vector - temp src string-aux-offset [+] MOV - new-dst temp XCHG - ! Compute index - new-dst index ADD - new-dst index ADD - ! Load high 16 bits - new-dst 2 small-reg new-dst byte-array-offset [+] MOV - new-dst new-dst 2 small-reg MOVZX - new-dst 7 SHL - ! Compute code point - new-dst temp XOR - "end" resolve-label - dst new-dst ?MOV - ] with-small-register ; - -M:: x86 %set-string-nth-fast ( ch str index temp -- ) - ch { index str temp } [| new-ch | - new-ch ch ?MOV - temp str index [+] LEA - temp string-offset [+] new-ch 1 small-reg MOV - ] with-small-register ; - -:: %alien-integer-getter ( dst src size quot -- ) - dst { src } [| new-dst | - new-dst dup size small-reg dup src [] MOV - quot call - dst new-dst ?MOV - ] with-small-register ; inline - -: %alien-unsigned-getter ( dst src size -- ) - [ MOVZX ] %alien-integer-getter ; inline - -M: x86 %alien-unsigned-1 1 %alien-unsigned-getter ; -M: x86 %alien-unsigned-2 2 %alien-unsigned-getter ; - -: %alien-signed-getter ( dst src size -- ) - [ MOVSX ] %alien-integer-getter ; inline - -M: x86 %alien-signed-1 1 %alien-signed-getter ; -M: x86 %alien-signed-2 2 %alien-signed-getter ; -M: x86 %alien-signed-4 4 %alien-signed-getter ; - -M: x86 %alien-unsigned-4 4 [ 2drop ] %alien-integer-getter ; - -M: x86 %alien-cell [] MOV ; -M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ; -M: x86 %alien-double [] MOVSD ; - -:: %alien-integer-setter ( ptr value size -- ) - value { ptr } [| new-value | - new-value value ?MOV - ptr [] new-value size small-reg MOV - ] with-small-register ; inline - -M: x86 %set-alien-integer-1 1 %alien-integer-setter ; -M: x86 %set-alien-integer-2 2 %alien-integer-setter ; -M: x86 %set-alien-integer-4 4 %alien-integer-setter ; -M: x86 %set-alien-cell [ [] ] dip MOV ; -M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ; -M: x86 %set-alien-double [ [] ] dip MOVSD ; - : load-zone-ptr ( reg -- ) #! Load pointer to start of zone array 0 MOV "nursery" f rc-absolute-cell rel-dlsym ; @@ -484,38 +435,19 @@ M:: x86 %write-barrier ( src card# table -- ) table table [] MOV table card# [+] card-mark MOV ; -:: check-nursery ( temp1 temp2 -- ) +M:: x86 %check-nursery ( label temp1 temp2 -- ) temp1 load-zone-ptr temp2 temp1 cell [+] MOV temp2 1024 ADD temp1 temp1 3 cells [+] MOV - temp2 temp1 CMP ; + temp2 temp1 CMP + label JLE ; -GENERIC# save-gc-root 1 ( gc-root operand temp -- ) +M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ; -M:: spill-slot save-gc-root ( gc-root spill-slot temp -- ) - temp spill-slot n>> spill-integer@ MOV - gc-root gc-root@ temp MOV ; +M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ; -M:: word save-gc-root ( gc-root register temp -- ) - gc-root gc-root@ register MOV ; - -: save-gc-roots ( gc-roots temp -- ) - '[ _ save-gc-root ] assoc-each ; - -GENERIC# load-gc-root 1 ( gc-root operand temp -- ) - -M:: spill-slot load-gc-root ( gc-root spill-slot temp -- ) - temp gc-root gc-root@ MOV - spill-slot n>> spill-integer@ temp MOV ; - -M:: word load-gc-root ( gc-root register temp -- ) - register gc-root gc-root@ MOV ; - -: load-gc-roots ( gc-roots temp -- ) - '[ _ load-gc-root ] assoc-each ; - -:: call-gc ( gc-root-count -- ) +M:: x86 %call-gc ( gc-root-count -- ) ! Pass pointer to start of GC roots as first parameter param-reg-1 gc-root-base param@ LEA ! Pass number of roots as second parameter @@ -524,15 +456,6 @@ M:: word load-gc-root ( gc-root register temp -- ) %prepare-alien-invoke "inline_gc" f %alien-invoke ; -M:: x86 %gc ( temp1 temp2 gc-roots gc-root-count -- ) - "end" define-label - temp1 temp2 check-nursery - "end" get JLE - gc-roots temp1 save-gc-roots - gc-root-count call-gc - gc-roots temp1 load-gc-roots - "end" resolve-label ; - M: x86 %alien-global [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ; diff --git a/extra/benchmark/pidigits/pidigits.factor b/extra/benchmark/pidigits/pidigits.factor index 0f8a98e6f9..d001d81a8c 100644 --- a/extra/benchmark/pidigits/pidigits.factor +++ b/extra/benchmark/pidigits/pidigits.factor @@ -54,6 +54,6 @@ IN: benchmark.pidigits [ 1 { { 1 0 } { 0 1 } } ] dip 0 0 (pidigits) ; : pidigits-main ( -- ) - 10000 pidigits ; + 2000 pidigits ; MAIN: pidigits-main diff --git a/extra/gpu/demos/bunny/sobel.f.glsl b/extra/gpu/demos/bunny/sobel.f.glsl index 16d2e408f2..7d21baf2d0 100644 --- a/extra/gpu/demos/bunny/sobel.f.glsl +++ b/extra/gpu/demos/bunny/sobel.f.glsl @@ -37,7 +37,7 @@ border_factor(vec2 texcoord) void main() { - gl_FragColor = /*vec4(border_factor(texcoord));*/ mix( + gl_FragColor = mix( texture2D(color_texture, texcoord), line_color, border_factor(texcoord) diff --git a/extra/gpu/render/render.factor b/extra/gpu/render/render.factor index ce6e0e25ff..8f1679bfa8 100644 --- a/extra/gpu/render/render.factor +++ b/extra/gpu/render/render.factor @@ -8,7 +8,7 @@ gpu.textures gpu.textures.private half-floats images kernel lexer locals math math.order math.parser namespaces opengl opengl.gl parser quotations sequences slots sorting specialized-arrays.alien specialized-arrays.float specialized-arrays.int -specialized-arrays.uint strings tr ui.gadgets.worlds variants +specialized-arrays.uint strings ui.gadgets.worlds variants vocabs.parser words ; IN: gpu.render @@ -338,8 +338,6 @@ DEFER: [bind-uniform-tuple] texture-unit' value>>-quot { value-cleave 2cleave } append ; -TR: hyphens>underscores "-" "_" ; - :: [bind-uniform] ( texture-unit uniform prefix -- texture-unit' quot ) prefix uniform name>> append hyphens>underscores :> name uniform uniform-type>> :> type diff --git a/extra/gpu/shaders/shaders.factor b/extra/gpu/shaders/shaders.factor index d2dd29595a..58633d4a71 100755 --- a/extra/gpu/shaders/shaders.factor +++ b/extra/gpu/shaders/shaders.factor @@ -8,7 +8,7 @@ io.encodings.ascii io.files io.pathnames kernel lexer literals locals math math.parser memoize multiline namespaces opengl opengl.gl opengl.shaders parser quotations sequences specialized-arrays.alien specialized-arrays.int splitting -strings ui.gadgets.worlds variants vectors vocabs vocabs.loader +strings tr ui.gadgets.worlds variants vectors vocabs vocabs.loader vocabs.parser words words.constant ; IN: gpu.shaders @@ -65,6 +65,8 @@ MEMO: output-index ( program-instance output-name -- index ) underscores "-" "_" ; + : gl-vertex-type ( component-type -- gl-type ) { { ubyte-components [ GL_UNSIGNED_BYTE ] } @@ -125,12 +127,12 @@ MEMO: output-index ( program-instance output-name -- index ) } 0&& [ vertex-attribute inaccurate-feedback-attribute-error ] unless ; :: [bind-vertex-attribute] ( stride offset vertex-attribute -- stride offset' quot ) - vertex-attribute name>> :> name - vertex-attribute component-type>> :> type - type gl-vertex-type :> gl-type - vertex-attribute dim>> :> dim - vertex-attribute normalize?>> >c-bool :> normalize? - vertex-attribute vertex-attribute-size :> size + vertex-attribute name>> hyphens>underscores :> name + vertex-attribute component-type>> :> type + type gl-vertex-type :> gl-type + vertex-attribute dim>> :> dim + vertex-attribute normalize?>> >c-bool :> normalize? + vertex-attribute vertex-attribute-size :> size stride offset size + { diff --git a/vm/debug.cpp b/vm/debug.cpp index 22e92809a7..5f78afb9db 100755 --- a/vm/debug.cpp +++ b/vm/debug.cpp @@ -173,10 +173,15 @@ void print_stack_frame(stack_frame *frame) print_string("\n"); print_obj(frame_scan(frame)); print_string("\n"); + print_string("word/quot addr: "); print_cell_hex((cell)frame_executing(frame)); - print_string(" "); + print_string("\n"); + print_string("word/quot xt: "); print_cell_hex((cell)frame->xt); print_string("\n"); + print_string("return address: "); + print_cell_hex((cell)FRAME_RETURN_ADDRESS(frame)); + print_string("\n"); } void print_callstack()