From 91e5c05f40a52dfacc5757c5c21c5416cb816973 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 19:28:58 -0500 Subject: [PATCH 01/21] debug.cpp: fep now prints return addresses in call stack dump --- vm/debug.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vm/debug.cpp b/vm/debug.cpp index 22e92809a7..5f78afb9db 100755 --- a/vm/debug.cpp +++ b/vm/debug.cpp @@ -173,10 +173,15 @@ void print_stack_frame(stack_frame *frame) print_string("\n"); print_obj(frame_scan(frame)); print_string("\n"); + print_string("word/quot addr: "); print_cell_hex((cell)frame_executing(frame)); - print_string(" "); + print_string("\n"); + print_string("word/quot xt: "); print_cell_hex((cell)frame->xt); print_string("\n"); + print_string("return address: "); + print_cell_hex((cell)FRAME_RETURN_ADDRESS(frame)); + print_string("\n"); } void print_callstack() From 1e389c921d6bede8c4e72b34431ed4493d49047e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 29 Jul 2009 20:06:28 -0500 Subject: [PATCH 02/21] remove some leftover debug code from bunny shader --- extra/gpu/demos/bunny/sobel.f.glsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/gpu/demos/bunny/sobel.f.glsl b/extra/gpu/demos/bunny/sobel.f.glsl index 16d2e408f2..7d21baf2d0 100644 --- a/extra/gpu/demos/bunny/sobel.f.glsl +++ b/extra/gpu/demos/bunny/sobel.f.glsl @@ -37,7 +37,7 @@ border_factor(vec2 texcoord) void main() { - gl_FragColor = /*vec4(border_factor(texcoord));*/ mix( + gl_FragColor = mix( texture2D(color_texture, texcoord), line_color, border_factor(texcoord) From 73862a9a03940999eac37d6374c74011ccc52e3d Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 21:44:08 -0500 Subject: [PATCH 03/21] cpu.x86.assembler: move operands to operands sub-vocabulary, clean up small-reg-* code in compiler backend --- basis/cpu/x86/32/32.factor | 13 +- basis/cpu/x86/64/64.factor | 13 +- basis/cpu/x86/64/unix/unix.factor | 8 +- .../cpu/x86/assembler/assembler-tests.factor | 3 +- basis/cpu/x86/assembler/assembler.factor | 98 +-------------- basis/cpu/x86/assembler/authors.txt | 1 + .../x86/assembler/operands/operands.factor | 118 ++++++++++++++++++ basis/cpu/x86/assembler/syntax/syntax.factor | 27 ++-- basis/cpu/x86/x86.factor | 87 ++----------- 9 files changed, 170 insertions(+), 198 deletions(-) create mode 100644 basis/cpu/x86/assembler/operands/operands.factor diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor index 727131aa25..76699c1306 100755 --- a/basis/cpu/x86/32/32.factor +++ b/basis/cpu/x86/32/32.factor @@ -1,13 +1,12 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: locals alien.c-types alien.syntax arrays kernel fry -math namespaces sequences system layouts io vocabs.loader -accessors init combinators command-line cpu.x86.assembler -cpu.x86 cpu.architecture make compiler compiler.units +USING: locals alien.c-types alien.syntax arrays kernel fry math +namespaces sequences system layouts io vocabs.loader accessors init +combinators command-line make compiler compiler.units compiler.constants compiler.alien compiler.codegen -compiler.codegen.fixup compiler.cfg.instructions -compiler.cfg.builder compiler.cfg.intrinsics -compiler.cfg.stack-frame ; +compiler.codegen.fixup compiler.cfg.instructions compiler.cfg.builder +compiler.cfg.intrinsics compiler.cfg.stack-frame cpu.x86.assembler +cpu.x86.assembler.operands cpu.x86 cpu.architecture ; IN: cpu.x86.32 ! We implement the FFI for Linux, OS X and Windows all at once. diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor index 8eb04eb2b5..f837c7de73 100644 --- a/basis/cpu/x86/64/64.factor +++ b/basis/cpu/x86/64/64.factor @@ -1,12 +1,11 @@ ! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays kernel math namespaces make sequences -system layouts alien alien.c-types alien.accessors alien.structs -slots splitting assocs combinators locals cpu.x86.assembler -cpu.x86 cpu.architecture compiler.constants -compiler.codegen compiler.codegen.fixup -compiler.cfg.instructions compiler.cfg.builder -compiler.cfg.intrinsics compiler.cfg.stack-frame ; +USING: accessors arrays kernel math namespaces make sequences system +layouts alien alien.c-types alien.accessors alien.structs slots +splitting assocs combinators locals compiler.constants +compiler.codegen compiler.codegen.fixup compiler.cfg.instructions +compiler.cfg.builder compiler.cfg.intrinsics compiler.cfg.stack-frame +cpu.x86.assembler cpu.x86.assembler.operands cpu.x86 cpu.architecture ; IN: cpu.x86.64 M: x86.64 machine-registers diff --git a/basis/cpu/x86/64/unix/unix.factor b/basis/cpu/x86/64/unix/unix.factor index eea960d03d..7ab25b6d3f 100644 --- a/basis/cpu/x86/64/unix/unix.factor +++ b/basis/cpu/x86/64/unix/unix.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays sequences math splitting make assocs -kernel layouts system alien.c-types alien.structs -cpu.architecture cpu.x86.assembler cpu.x86 -compiler.codegen compiler.cfg.registers ; +USING: accessors arrays sequences math splitting make assocs kernel +layouts system alien.c-types alien.structs cpu.architecture +cpu.x86.assembler cpu.x86.assembler.operands cpu.x86 compiler.codegen +compiler.cfg.registers ; IN: cpu.x86.64.unix M: int-regs param-regs drop { RDI RSI RDX RCX R8 R9 } ; diff --git a/basis/cpu/x86/assembler/assembler-tests.factor b/basis/cpu/x86/assembler/assembler-tests.factor index 66adee6bf6..962309c67e 100644 --- a/basis/cpu/x86/assembler/assembler-tests.factor +++ b/basis/cpu/x86/assembler/assembler-tests.factor @@ -1,4 +1,5 @@ -USING: cpu.x86.assembler kernel tools.test namespaces make ; +USING: cpu.x86.assembler cpu.x86.operands +kernel tools.test namespaces make ; IN: cpu.x86.assembler.tests [ { HEX: 49 HEX: 89 HEX: 04 HEX: 24 } ] [ [ R12 [] RAX MOV ] { } make ] unit-test diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index e91ebdcb1a..f15704a015 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -1,89 +1,16 @@ -! Copyright (C) 2005, 2009 Slava Pestov. +! Copyright (C) 2005, 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. USING: arrays io.binary kernel combinators kernel.private math namespaces make sequences words system layouts math.order accessors -cpu.x86.assembler.syntax ; +cpu.x86.assembler.operands cpu.x86.assembler.operands.private ; QUALIFIED: sequences IN: cpu.x86.assembler ! A postfix assembler for x86-32 and x86-64. -! In 32-bit mode, { 1234 } is absolute indirect addressing. -! In 64-bit mode, { 1234 } is RIP-relative. -! Beware! - -! Register operands -- eg, ECX -REGISTERS: 8 AL CL DL BL SPL BPL SIL DIL R8B R9B R10B R11B R12B R13B R14B R15B ; - -ALIAS: AH SPL -ALIAS: CH BPL -ALIAS: DH SIL -ALIAS: BH DIL - -REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ; - -REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ; - -REGISTERS: 64 -RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ; - -REGISTERS: 128 -XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 -XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ; - -TUPLE: byte value ; - -C: byte - ; - -! Addressing modes -TUPLE: indirect base index scale displacement ; - -M: indirect extended? base>> extended? ; - -: canonicalize-EBP ( indirect -- indirect ) - #! { EBP } ==> { EBP 0 } - dup [ base>> { EBP RBP R13 } member? ] [ displacement>> not ] bi and - [ 0 >>displacement ] when ; - -ERROR: bad-index indirect ; - -: check-ESP ( indirect -- indirect ) - dup index>> { ESP RSP } memq? [ bad-index ] when ; - -: canonicalize ( indirect -- indirect ) - #! Modify the indirect to work around certain addressing mode - #! quirks. - canonicalize-EBP check-ESP ; - -: ( base index scale displacement -- indirect ) - indirect boa canonicalize ; : reg-code ( reg -- n ) "register" word-prop 7 bitand ; @@ -168,18 +95,6 @@ M: register displacement, drop ; : addressing ( reg# indirect -- ) [ mod-r/m, ] [ sib, ] [ displacement, ] tri ; -! Utilities -UNION: operand register indirect ; - -GENERIC: operand-64? ( operand -- ? ) - -M: indirect operand-64? - [ base>> ] [ index>> ] bi [ operand-64? ] either? ; - -M: register-64 operand-64? drop t ; - -M: object operand-64? drop f ; - : rex.w? ( rex.w reg r/m -- ? ) { { [ dup register-128? ] [ drop operand-64? ] } @@ -276,15 +191,6 @@ M: object operand-64? drop f ; PRIVATE> -: [] ( reg/displacement -- indirect ) - dup integer? [ [ f f f ] dip ] [ f f f ] if ; - -: [+] ( reg displacement -- indirect ) - dup integer? - [ dup zero? [ drop f ] when [ f f ] dip ] - [ f f ] if - ; - ! Moving stuff GENERIC: PUSH ( op -- ) M: register PUSH f HEX: 50 short-operand ; diff --git a/basis/cpu/x86/assembler/authors.txt b/basis/cpu/x86/assembler/authors.txt index 1901f27a24..580f882c8d 100755 --- a/basis/cpu/x86/assembler/authors.txt +++ b/basis/cpu/x86/assembler/authors.txt @@ -1 +1,2 @@ Slava Pestov +Joe Groff diff --git a/basis/cpu/x86/assembler/operands/operands.factor b/basis/cpu/x86/assembler/operands/operands.factor new file mode 100644 index 0000000000..733c57689b --- /dev/null +++ b/basis/cpu/x86/assembler/operands/operands.factor @@ -0,0 +1,118 @@ +! Copyright (C) 2008, 2009 Slava Pestov, Joe Groff. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel words math accessors sequences cpu.x86.assembler.syntax ; +IN: cpu.x86.assembler.operands + +! In 32-bit mode, { 1234 } is absolute indirect addressing. +! In 64-bit mode, { 1234 } is RIP-relative. +! Beware! + +REGISTERS: 8 AL CL DL BL SPL BPL SIL DIL R8B R9B R10B R11B R12B R13B R14B R15B ; + +ALIAS: AH SPL +ALIAS: CH BPL +ALIAS: DH SIL +ALIAS: BH DIL + +REGISTERS: 16 AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W ; + +REGISTERS: 32 EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D ; + +REGISTERS: 64 +RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 ; + +REGISTERS: 128 +XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 +XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 ; + + ; + +! Addressing modes +TUPLE: indirect base index scale displacement ; + +M: indirect extended? base>> extended? ; + +: canonicalize-EBP ( indirect -- indirect ) + #! { EBP } ==> { EBP 0 } + dup [ base>> { EBP RBP R13 } member? ] [ displacement>> not ] bi and + [ 0 >>displacement ] when ; + +ERROR: bad-index indirect ; + +: check-ESP ( indirect -- indirect ) + dup index>> { ESP RSP } memq? [ bad-index ] when ; + +: canonicalize ( indirect -- indirect ) + #! Modify the indirect to work around certain addressing mode + #! quirks. + canonicalize-EBP check-ESP ; + +: ( base index scale displacement -- indirect ) + indirect boa canonicalize ; + +! Utilities +UNION: operand register indirect ; + +GENERIC: operand-64? ( operand -- ? ) + +M: indirect operand-64? + [ base>> ] [ index>> ] bi [ operand-64? ] either? ; + +M: register-64 operand-64? drop t ; + +M: object operand-64? drop f ; + +PRIVATE> + +: [] ( reg/displacement -- indirect ) + dup integer? [ [ f f f ] dip ] [ f f f ] if ; + +: [+] ( reg displacement -- indirect ) + dup integer? + [ dup zero? [ drop f ] when [ f f ] dip ] + [ f f ] if + ; + +TUPLE: byte value ; + +C: byte + + + +: 8-bit-version-of ( register -- register' ) 8 n-bit-version-of ; +: 16-bit-version-of ( register -- register' ) 16 n-bit-version-of ; +: 32-bit-version-of ( register -- register' ) 32 n-bit-version-of ; +: 64-bit-version-of ( register -- register' ) 64 n-bit-version-of ; +: native-version-of ( register -- register' ) cell-bits n-bit-version-of ; \ No newline at end of file diff --git a/basis/cpu/x86/assembler/syntax/syntax.factor b/basis/cpu/x86/assembler/syntax/syntax.factor index 631dcaa8f7..5b65c19155 100644 --- a/basis/cpu/x86/assembler/syntax/syntax.factor +++ b/basis/cpu/x86/assembler/syntax/syntax.factor @@ -1,14 +1,23 @@ -! Copyright (C) 2008 Slava Pestov. +! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel words words.symbol sequences lexer parser fry ; +USING: kernel words words.symbol sequences lexer parser fry +namespaces combinators assocs ; IN: cpu.x86.assembler.syntax -: define-register ( name num size -- ) - [ "cpu.x86.assembler" create dup define-symbol ] 2dip - [ dupd "register" set-word-prop ] dip - "register-size" set-word-prop ; +SYMBOL: registers -: define-registers ( names size -- ) - '[ _ define-register ] each-index ; +registers [ H{ } clone ] initialize -SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ; +: define-register ( name num size -- word ) + [ "cpu.x86.assembler.operands" create ] 2dip { + [ 2drop ] + [ 2drop define-symbol ] + [ drop "register" set-word-prop ] + [ nip "register-size" set-word-prop ] + } 3cleave ; + +: define-registers ( size names -- ) + [ swap '[ _ define-register ] map-index ] [ drop ] 2bi + registers get set-at ; + +SYNTAX: REGISTERS: scan-word ";" parse-tokens define-registers ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 258f842598..337232c259 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs alien alien.c-types arrays strings -cpu.x86.assembler cpu.x86.assembler.private cpu.architecture -kernel kernel.private math memory namespaces make sequences -words system layouts combinators math.order fry locals +cpu.x86.assembler cpu.x86.assembler.private cpu.x86.assembler.operands +cpu.architecture kernel kernel.private math memory namespaces make +sequences words system layouts combinators math.order fry locals compiler.constants compiler.cfg.registers compiler.cfg.instructions @@ -264,67 +264,6 @@ M:: x86 %box-alien ( dst src temp -- ) "end" resolve-label ] with-scope ; -: small-reg-8 ( reg -- reg' ) - H{ - { EAX RAX } - { ECX RCX } - { EDX RDX } - { EBX RBX } - { ESP RSP } - { EBP RBP } - { ESI RSP } - { EDI RDI } - - { RAX RAX } - { RCX RCX } - { RDX RDX } - { RBX RBX } - { RSP RSP } - { RBP RBP } - { RSI RSP } - { RDI RDI } - } at ; inline - -: small-reg-4 ( reg -- reg' ) - small-reg-8 H{ - { RAX EAX } - { RCX ECX } - { RDX EDX } - { RBX EBX } - { RSP ESP } - { RBP EBP } - { RSI ESP } - { RDI EDI } - } at ; inline - -: small-reg-2 ( reg -- reg' ) - small-reg-4 H{ - { EAX AX } - { ECX CX } - { EDX DX } - { EBX BX } - { ESP SP } - { EBP BP } - { ESI SI } - { EDI DI } - } at ; inline - -: small-reg-1 ( reg -- reg' ) - small-reg-4 { - { EAX AL } - { ECX CL } - { EDX DL } - { EBX BL } - } at ; inline - -: small-reg ( reg size -- reg' ) - { - { 1 [ small-reg-1 ] } - { 2 [ small-reg-2 ] } - { 4 [ small-reg-4 ] } - { 8 [ small-reg-8 ] } - } case ; - HOOK: small-regs cpu ( -- regs ) M: x86.32 small-regs { EAX ECX EDX EBX } ; @@ -336,7 +275,7 @@ M: x86.32 small-reg-native small-reg-4 ; M: x86.64 small-reg-native small-reg-8 ; : small-reg-that-isn't ( exclude -- reg' ) - small-regs swap [ small-reg-native ] map '[ _ memq? not ] find nip ; + small-regs swap [ native-version-of ] map '[ _ memq? not ] find nip ; : with-save/restore ( reg quot -- ) [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline @@ -346,7 +285,7 @@ M: x86.64 small-reg-native small-reg-8 ; #! call the quot with that. Otherwise, we find a small #! register that is not in exclude, and call quot, saving #! and restoring the small register. - dst small-reg-native small-regs memq? [ dst quot call ] [ + dst small-regs memq? [ dst quot call ] [ exclude small-reg-that-isn't [ quot call ] with-save/restore ] if ; inline @@ -362,7 +301,7 @@ M: x86.64 small-reg-native small-reg-8 ; src2 CL quot call dst src2 XCHG ] [ - ECX small-reg-native [ + ECX native-version-of [ CL src2 MOV drop dst CL quot call ] with-save/restore @@ -380,8 +319,8 @@ M:: x86 %string-nth ( dst src index temp -- ) ! 8th bit indicates whether we have to load from ! the aux vector or not. temp src index [+] LEA - new-dst 1 small-reg temp string-offset [+] MOV - new-dst new-dst 1 small-reg MOVZX + new-dst 8-bit-version-of temp string-offset [+] MOV + new-dst new-dst 8-bit-version-of MOVZX ! Do we have to look at the aux vector? new-dst HEX: 80 CMP "end" get JL @@ -392,8 +331,8 @@ M:: x86 %string-nth ( dst src index temp -- ) new-dst index ADD new-dst index ADD ! Load high 16 bits - new-dst 2 small-reg new-dst byte-array-offset [+] MOV - new-dst new-dst 2 small-reg MOVZX + new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV + new-dst new-dst 16-bit-version-of MOVZX new-dst 7 SHL ! Compute code point new-dst temp XOR @@ -405,12 +344,12 @@ M:: x86 %set-string-nth-fast ( ch str index temp -- ) ch { index str temp } [| new-ch | new-ch ch ?MOV temp str index [+] LEA - temp string-offset [+] new-ch 1 small-reg MOV + temp string-offset [+] new-ch 8-bit-version-of MOV ] with-small-register ; :: %alien-integer-getter ( dst src size quot -- ) dst { src } [| new-dst | - new-dst dup size small-reg dup src [] MOV + new-dst dup size 8 * n-bit-version-of dup src [] MOV quot call dst new-dst ?MOV ] with-small-register ; inline @@ -437,7 +376,7 @@ M: x86 %alien-double [] MOVSD ; :: %alien-integer-setter ( ptr value size -- ) value { ptr } [| new-value | new-value value ?MOV - ptr [] new-value size small-reg MOV + ptr [] new-value size 8 * n-bit-version-of MOV ] with-small-register ; inline M: x86 %set-alien-integer-1 1 %alien-integer-setter ; From 8ca17d053c51312a43bfaac6c8161738be8bbcae Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 21:56:37 -0500 Subject: [PATCH 04/21] cpu.x86: use full set of 8-bit, 16-bit and 32-bit registers on x86-64 to avoid clumsy save/restore logic --- basis/cpu/x86/assembler/operands/operands.factor | 7 ++----- basis/cpu/x86/x86.factor | 15 +++++---------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/basis/cpu/x86/assembler/operands/operands.factor b/basis/cpu/x86/assembler/operands/operands.factor index 733c57689b..b931fcfd87 100644 --- a/basis/cpu/x86/assembler/operands/operands.factor +++ b/basis/cpu/x86/assembler/operands/operands.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2008, 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel words math accessors sequences cpu.x86.assembler.syntax ; +USING: kernel words math accessors sequences namespaces +assocs layouts cpu.x86.assembler.syntax ; IN: cpu.x86.assembler.operands ! In 32-bit mode, { 1234 } is absolute indirect addressing. @@ -101,16 +102,12 @@ TUPLE: byte value ; C: byte - - : 8-bit-version-of ( register -- register' ) 8 n-bit-version-of ; : 16-bit-version-of ( register -- register' ) 16 n-bit-version-of ; : 32-bit-version-of ( register -- register' ) 32 n-bit-version-of ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 337232c259..5dc3ef2e0a 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -264,18 +264,13 @@ M:: x86 %box-alien ( dst src temp -- ) "end" resolve-label ] with-scope ; -HOOK: small-regs cpu ( -- regs ) +HOOK: small-reg? cpu ( reg -- regs ) -M: x86.32 small-regs { EAX ECX EDX EBX } ; -M: x86.64 small-regs { RAX RCX RDX RBX } ; - -HOOK: small-reg-native cpu ( reg -- reg' ) - -M: x86.32 small-reg-native small-reg-4 ; -M: x86.64 small-reg-native small-reg-8 ; +M: x86.32 small-reg? { EAX ECX EDX EBX } memq? ; +M: x86.64 small-reg? drop t ; : small-reg-that-isn't ( exclude -- reg' ) - small-regs swap [ native-version-of ] map '[ _ memq? not ] find nip ; + [ native-version-of ] map [ small-reg? not ] find nip ; : with-save/restore ( reg quot -- ) [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline @@ -285,7 +280,7 @@ M: x86.64 small-reg-native small-reg-8 ; #! call the quot with that. Otherwise, we find a small #! register that is not in exclude, and call quot, saving #! and restoring the small register. - dst small-regs memq? [ dst quot call ] [ + dst small-reg? [ dst quot call ] [ exclude small-reg-that-isn't [ quot call ] with-save/restore ] if ; inline From 1e8d13c1f16addf141914213ce20e952f536ca9f Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 22:32:22 -0500 Subject: [PATCH 05/21] cpu.x86.assembler: fix extended 8-bit registers (DIL, SIL, SPL, BPL) --- .../cpu/x86/assembler/assembler-tests.factor | 4 ++- basis/cpu/x86/assembler/assembler.factor | 36 +++++++++---------- .../x86/assembler/operands/operands.factor | 5 ++- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/basis/cpu/x86/assembler/assembler-tests.factor b/basis/cpu/x86/assembler/assembler-tests.factor index 962309c67e..14d4a1dd7c 100644 --- a/basis/cpu/x86/assembler/assembler-tests.factor +++ b/basis/cpu/x86/assembler/assembler-tests.factor @@ -1,7 +1,9 @@ -USING: cpu.x86.assembler cpu.x86.operands +USING: cpu.x86.assembler cpu.x86.assembler.operands kernel tools.test namespaces make ; IN: cpu.x86.assembler.tests +[ { HEX: 40 HEX: 8a HEX: 2a } ] [ [ BPL RDX [] MOV ] { } make ] unit-test + [ { HEX: 49 HEX: 89 HEX: 04 HEX: 24 } ] [ [ R12 [] RAX MOV ] { } make ] unit-test [ { HEX: 49 HEX: 8b HEX: 06 } ] [ [ RAX R14 [] MOV ] { } make ] unit-test diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index f15704a015..cefc190105 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2005, 2009 Slava Pestov, Joe Groff. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays io.binary kernel combinators kernel.private math +USING: arrays io.binary kernel combinators kernel.private math locals namespaces make sequences words system layouts math.order accessors cpu.x86.assembler.operands cpu.x86.assembler.operands.private ; QUALIFIED: sequences @@ -10,8 +10,6 @@ IN: cpu.x86.assembler > EBP or reg-code ; @@ -86,9 +84,7 @@ M: indirect displacement, dup displacement>> dup [ swap base>> [ dup fits-in-byte? [ , ] [ 4, ] if ] [ 4, ] if - ] [ - 2drop - ] if ; + ] [ 2drop ] if ; M: register displacement, drop ; @@ -107,22 +103,25 @@ M: register displacement, drop ; : rex.b ( m op -- n ) [ extended? [ BIN: 00000001 bitor ] when ] keep - dup indirect? [ - index>> extended? [ BIN: 00000010 bitor ] when - ] [ - drop - ] if ; + dup indirect? [ index>> extended? [ BIN: 00000010 bitor ] when ] [ drop ] if ; -: rex-prefix ( reg r/m rex.w -- ) +: no-prefix? ( prefix reg r/m -- ? ) + [ BIN: 01000000 = ] + [ extended-8-bit-register? not ] + [ extended-8-bit-register? not ] tri* + and and ; + +:: rex-prefix ( reg r/m rex.w -- ) #! Compile an AMD64 REX prefix. - 2over rex.w? BIN: 01001000 BIN: 01000000 ? - swap rex.r swap rex.b - dup BIN: 01000000 = [ drop ] [ , ] if ; + rex.w reg r/m rex.w? BIN: 01001000 BIN: 01000000 ? + r/m rex.r + reg rex.b + dup reg r/m no-prefix? [ drop ] [ , ] if ; : 16-prefix ( reg r/m -- ) [ register-16? ] either? [ HEX: 66 , ] when ; -: prefix ( reg r/m rex.w -- ) 2over 16-prefix rex-prefix ; +: prefix ( reg r/m rex.w -- ) [ drop 16-prefix ] [ rex-prefix ] 3bi ; : prefix-1 ( reg rex.w -- ) f swap prefix ; @@ -184,10 +183,7 @@ M: register displacement, drop ; : 2-operand ( dst src op -- ) #! Sets the opcode's direction bit. It is set if the #! destination is a direct register operand. - 2over 16-prefix - direction-bit - operand-size-bit - (2-operand) ; + [ drop 16-prefix ] [ direction-bit operand-size-bit (2-operand) ] 3bi ; PRIVATE> diff --git a/basis/cpu/x86/assembler/operands/operands.factor b/basis/cpu/x86/assembler/operands/operands.factor index b931fcfd87..d3cb66ff12 100644 --- a/basis/cpu/x86/assembler/operands/operands.factor +++ b/basis/cpu/x86/assembler/operands/operands.factor @@ -102,10 +102,13 @@ TUPLE: byte value ; C: byte +: extended-8-bit-register? ( register -- ? ) + { SPL BPL SIL DIL } memq? ; + : n-bit-version-of ( register n -- register' ) ! Certain 8-bit registers don't exist in 32-bit mode... [ "register" word-prop ] dip registers get at nth - dup { SPL BPL SIL DIL } memq? cell 4 = and + dup extended-8-bit-register? cell 4 = and [ drop f ] when ; : 8-bit-version-of ( register -- register' ) 8 n-bit-version-of ; From b133649eddd8cad391cef32bbfa168b7487661b1 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 23:35:51 -0500 Subject: [PATCH 06/21] compiler.cfg.ssa.destruction: tweak in preparation for landing Dan's new SSA liveness analysis --- basis/compiler/cfg/liveness/ssa/ssa.factor | 4 ++++ .../cfg/ssa/destruction/live-ranges/live-ranges.factor | 6 +++--- .../ssa/destruction/process-blocks/process-blocks.factor | 8 ++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/basis/compiler/cfg/liveness/ssa/ssa.factor b/basis/compiler/cfg/liveness/ssa/ssa.factor index 9fa22d22b1..dbfe2d70b4 100644 --- a/basis/compiler/cfg/liveness/ssa/ssa.factor +++ b/basis/compiler/cfg/liveness/ssa/ssa.factor @@ -55,3 +55,7 @@ SYMBOL: work-list H{ } clone live-outs set dup post-order add-to-work-list work-list get [ liveness-step ] slurp-deque ; + +: live-in? ( vreg bb -- ? ) live-in key? ; + +: live-out? ( vreg bb -- ? ) live-out key? ; \ No newline at end of file diff --git a/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor b/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor index 536f5e1e68..01aebd7e1c 100644 --- a/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor +++ b/basis/compiler/cfg/ssa/destruction/live-ranges/live-ranges.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs fry kernel namespaces sequences math arrays compiler.cfg.def-use compiler.cfg.instructions -compiler.cfg.liveness compiler.cfg.rpo ; +compiler.cfg.liveness.ssa compiler.cfg.rpo ; IN: compiler.cfg.ssa.destruction.live-ranges ! Live ranges for interference testing @@ -52,9 +52,9 @@ PRIVATE> ERROR: bad-kill-index vreg bb ; : kill-index ( vreg bb -- n ) - 2dup live-out key? [ 2drop 1/0. ] [ + 2dup live-out? [ 2drop 1/0. ] [ 2dup kill-indices get at at* [ 2nip ] [ - drop 2dup live-in key? + drop 2dup live-in? [ bad-kill-index ] [ 2drop -1/0. ] if ] if ] if ; diff --git a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor index f8c8a4d8b2..18af6e9904 100644 --- a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor +++ b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor @@ -4,7 +4,7 @@ USING: accessors assocs fry kernel locals math math.order arrays namespaces sequences sorting sets combinators combinators.short-circuit make compiler.cfg.def-use compiler.cfg.instructions -compiler.cfg.liveness +compiler.cfg.liveness.ssa compiler.cfg.dominance compiler.cfg.ssa.destruction.state compiler.cfg.ssa.destruction.forest @@ -19,13 +19,13 @@ IN: compiler.cfg.ssa.destruction.process-blocks SYMBOLS: phi-union unioned-blocks ; :: operand-live-into-phi-node's-block? ( bb src dst -- ? ) - src bb live-in key? ; + src bb live-in? ; :: phi-node-is-live-out-of-operand's-block? ( bb src dst -- ? ) - dst src def-of live-out key? ; + dst src def-of live-out? ; :: operand-is-phi-node-and-live-into-operand's-block? ( bb src dst -- ? ) - { [ src insn-of ##phi? ] [ src src def-of live-in key? ] } 0&& ; + { [ src insn-of ##phi? ] [ src src def-of live-in? ] } 0&& ; :: operand-being-renamed? ( bb src dst -- ? ) src processed-names get key? ; From 6274c0337afed6190fecc638cb7d4fe9933b5216 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 29 Jul 2009 23:43:00 -0500 Subject: [PATCH 07/21] compiler.cfg.ssa.destruction: fix --- .../cfg/ssa/destruction/process-blocks/process-blocks.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor index 18af6e9904..ce2aa1c5d7 100644 --- a/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor +++ b/basis/compiler/cfg/ssa/destruction/process-blocks/process-blocks.factor @@ -61,10 +61,10 @@ SYMBOLS: phi-union unioned-blocks ; } cond ; : node-is-live-in-of-child? ( node child -- ? ) - [ vreg>> ] [ bb>> live-in ] bi* key? ; + [ vreg>> ] [ bb>> ] bi* live-in? ; : node-is-live-out-of-child? ( node child -- ? ) - [ vreg>> ] [ bb>> live-out ] bi* key? ; + [ vreg>> ] [ bb>> ] bi* live-out? ; :: insert-copy ( bb src dst -- ) bb src dst trivial-interference From 791fbe4003d9824ce58a5a4422369492f0d8401b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 02:13:30 -0500 Subject: [PATCH 08/21] compiler.cfg.linear-scan: fix case where a register can be made available for only a part of a live interval's lifetime, but there are no more usages after the split location. This case never came up until global stack analysis, at which point it started to be exercised on x86-32 --- .../linear-scan/allocation/allocation.factor | 18 --- .../allocation/spilling/spilling.factor | 90 ++++++------- .../allocation/splitting/splitting.factor | 4 - .../cfg/linear-scan/debugger/debugger.factor | 17 +-- .../cfg/linear-scan/linear-scan-tests.factor | 124 +++++++++++++----- .../live-intervals/live-intervals.factor | 1 - 6 files changed, 129 insertions(+), 125 deletions(-) diff --git a/basis/compiler/cfg/linear-scan/allocation/allocation.factor b/basis/compiler/cfg/linear-scan/allocation/allocation.factor index c197da9814..d55266e6e4 100644 --- a/basis/compiler/cfg/linear-scan/allocation/allocation.factor +++ b/basis/compiler/cfg/linear-scan/allocation/allocation.factor @@ -28,29 +28,11 @@ IN: compiler.cfg.linear-scan.allocation : no-free-registers? ( result -- ? ) second 0 = ; inline -: split-to-fit ( new n -- before after ) - split-interval - [ [ compute-start/end ] bi@ ] - [ >>split-next drop ] - [ ] - 2tri ; - -: register-partially-available ( new result -- ) - { - { [ 2dup second 1 - spill-live-out? ] [ drop spill-live-out ] } - { [ 2dup second 1 - spill-live-in? ] [ drop spill-live-in ] } - [ - [ second 1 - split-to-fit ] keep - '[ _ register-available ] [ add-unhandled ] bi* - ] - } cond ; - : assign-register ( new -- ) dup coalesce? [ coalesce ] [ dup register-status { { [ dup no-free-registers? ] [ drop assign-blocked-register ] } { [ 2dup register-available? ] [ register-available ] } - ! [ register-partially-available ] [ drop assign-blocked-register ] } cond ] if ; diff --git a/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor b/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor index 14046a91f1..4dd3c8176c 100644 --- a/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor +++ b/basis/compiler/cfg/linear-scan/allocation/spilling/spilling.factor @@ -28,23 +28,42 @@ ERROR: bad-live-ranges interval ; [ swap first (>>from) ] 2bi ; -: split-for-spill ( live-interval n -- before after ) - split-interval - { - [ [ trim-before-ranges ] [ trim-after-ranges ] bi* ] - [ [ compute-start/end ] bi@ ] - [ [ check-ranges ] bi@ ] - [ ] - } 2cleave ; - : assign-spill ( live-interval -- ) - dup vreg>> assign-spill-slot >>spill-to f >>split-next drop ; + dup vreg>> assign-spill-slot >>spill-to drop ; + +: spill-before ( before -- before/f ) + ! If the interval does not have any usages before the spill location, + ! then it is the second child of an interval that was split. We reload + ! the value and let the resolve pass insert a split later. + dup uses>> empty? [ drop f ] [ + { + [ ] + [ assign-spill ] + [ trim-before-ranges ] + [ compute-start/end ] + [ check-ranges ] + } cleave + ] if ; : assign-reload ( live-interval -- ) dup vreg>> assign-spill-slot >>reload-from drop ; -: split-and-spill ( live-interval n -- before after ) - split-for-spill 2dup [ assign-spill ] [ assign-reload ] bi* ; +: spill-after ( after -- after/f ) + ! If the interval has no more usages after the spill location, + ! then it is the first child of an interval that was split. We + ! spill the value and let the resolve pass insert a reload later. + dup uses>> empty? [ drop f ] [ + { + [ ] + [ assign-reload ] + [ trim-after-ranges ] + [ compute-start/end ] + [ check-ranges ] + } cleave + ] if ; + +: split-for-spill ( live-interval n -- before after ) + split-interval [ spill-before ] [ spill-after ] bi* ; : find-use-position ( live-interval new -- n ) [ uses>> ] [ start>> '[ _ >= ] ] bi* find nip 1/0. or ; @@ -72,47 +91,12 @@ ERROR: bad-live-ranges interval ; [ uses>> first ] [ second ] bi* > ; : spill-new ( new pair -- ) - drop - { - [ trim-after-ranges ] - [ compute-start/end ] - [ assign-reload ] - [ add-unhandled ] - } cleave ; - -: spill-live-out? ( live-interval n -- ? ) [ uses>> last ] dip < ; - -: spill-live-out ( live-interval -- ) - ! The interval has no more usages after the spill location. This - ! means it is the first child of an interval that was split. We - ! spill the value and let the resolve pass insert a reload later. - { - [ trim-before-ranges ] - [ compute-start/end ] - [ assign-spill ] - [ add-handled ] - } cleave ; - -: spill-live-in? ( live-interval n -- ? ) [ uses>> first ] dip > ; - -: spill-live-in ( live-interval -- ) - ! The interval does not have any usages before the spill location. - ! This means it is the second child of an interval that was - ! split. We reload the value and let the resolve pass insert a - ! split later. - { - [ trim-after-ranges ] - [ compute-start/end ] - [ assign-reload ] - [ add-unhandled ] - } cleave ; + drop spill-after add-unhandled ; : spill ( live-interval n -- ) - { - { [ 2dup spill-live-out? ] [ drop spill-live-out ] } - { [ 2dup spill-live-in? ] [ drop spill-live-in ] } - [ split-and-spill [ add-handled ] [ add-unhandled ] bi* ] - } cond ; + split-for-spill + [ [ add-handled ] when* ] + [ [ add-unhandled ] when* ] bi* ; :: spill-intersecting-active ( new reg -- ) ! If there is an active interval using 'reg' (there should be at @@ -149,8 +133,8 @@ ERROR: bad-live-ranges interval ; ! A register would be available for part of the new ! interval's lifetime if all active and inactive intervals ! using that register were split and spilled. - [ second 1 - split-and-spill add-unhandled ] keep - spill-available ; + [ second 1 - split-for-spill [ add-unhandled ] when* ] keep + '[ _ spill-available ] when* ; : assign-blocked-register ( new -- ) dup spill-status { diff --git a/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor b/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor index 0a67710bc8..874523d70a 100644 --- a/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor +++ b/basis/compiler/cfg/linear-scan/allocation/splitting/splitting.factor @@ -27,9 +27,6 @@ IN: compiler.cfg.linear-scan.allocation.splitting : split-uses ( uses n -- before after ) '[ _ <= ] partition ; -: record-split ( live-interval before after -- ) - [ >>split-before ] [ >>split-after ] bi* drop ; inline - ERROR: splitting-too-early ; ERROR: splitting-too-late ; @@ -56,7 +53,6 @@ ERROR: splitting-atomic-interval ; live-interval clone :> after live-interval uses>> n split-uses before after [ (>>uses) ] bi-curry@ bi* live-interval ranges>> n split-ranges before after [ (>>ranges) ] bi-curry@ bi* - live-interval before after record-split before split-before after split-after ; diff --git a/basis/compiler/cfg/linear-scan/debugger/debugger.factor b/basis/compiler/cfg/linear-scan/debugger/debugger.factor index a350ee5f43..c9c1b77a0d 100644 --- a/basis/compiler/cfg/linear-scan/debugger/debugger.factor +++ b/basis/compiler/cfg/linear-scan/debugger/debugger.factor @@ -5,25 +5,12 @@ namespaces prettyprint compiler.cfg.linear-scan.live-intervals compiler.cfg.linear-scan.allocation compiler.cfg assocs ; IN: compiler.cfg.linear-scan.debugger -: check-assigned ( live-intervals -- ) - [ - reg>> - [ "Not all intervals have registers" throw ] unless - ] each ; - -: split-children ( live-interval -- seq ) - dup split-before>> [ - [ split-before>> ] [ split-after>> ] bi - [ split-children ] bi@ - append - ] [ 1array ] if ; - : check-linear-scan ( live-intervals machine-registers -- ) [ [ clone ] map dup [ [ vreg>> ] keep ] H{ } map>assoc live-intervals set - ] dip allocate-registers - [ split-children ] map concat check-assigned ; + ] dip + allocate-registers drop ; : picture ( uses -- str ) dup last 1 + CHAR: space diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor index f38946f8e2..df91109e78 100644 --- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor +++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor @@ -75,6 +75,9 @@ check-numbering? on { T{ live-range f 0 5 } } 0 split-ranges ] unit-test +H{ { int-regs 10 } { float-regs 20 } } clone spill-counts set +H{ } spill-slots set + [ T{ live-interval { vreg T{ vreg { reg-class int-regs } { n 1 } } } @@ -82,6 +85,7 @@ check-numbering? on { end 2 } { uses V{ 0 1 } } { ranges V{ T{ live-range f 0 2 } } } + { spill-to 10 } } T{ live-interval { vreg T{ vreg { reg-class int-regs } { n 1 } } } @@ -89,6 +93,7 @@ check-numbering? on { end 5 } { uses V{ 5 } } { ranges V{ T{ live-range f 5 5 } } } + { reload-from 10 } } ] [ T{ live-interval @@ -97,82 +102,61 @@ check-numbering? on { end 5 } { uses V{ 0 1 5 } } { ranges V{ T{ live-range f 0 5 } } } - } 2 split-for-spill [ f >>split-next ] bi@ + } 2 split-for-spill ] unit-test [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 0 } { end 1 } { uses V{ 0 } } { ranges V{ T{ live-range f 0 1 } } } + { spill-to 11 } } T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 1 } { end 5 } { uses V{ 1 5 } } { ranges V{ T{ live-range f 1 5 } } } + { reload-from 11 } } ] [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 2 } } } { start 0 } { end 5 } { uses V{ 0 1 5 } } { ranges V{ T{ live-range f 0 5 } } } - } 0 split-for-spill [ f >>split-next ] bi@ + } 0 split-for-spill ] unit-test [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 0 } { end 1 } { uses V{ 0 } } { ranges V{ T{ live-range f 0 1 } } } + { spill-to 12 } } T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 20 } { end 30 } { uses V{ 20 30 } } { ranges V{ T{ live-range f 20 30 } } } + { reload-from 12 } } ] [ T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } + { vreg T{ vreg { reg-class int-regs } { n 3 } } } { start 0 } { end 30 } { uses V{ 0 20 30 } } { ranges V{ T{ live-range f 0 8 } T{ live-range f 10 18 } T{ live-range f 20 30 } } } - } 10 split-for-spill [ f >>split-next ] bi@ -] unit-test - -[ - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 0 } - { end 4 } - { uses V{ 0 1 4 } } - { ranges V{ T{ live-range f 0 4 } } } - } - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 5 } - { end 10 } - { uses V{ 5 10 } } - { ranges V{ T{ live-range f 5 10 } } } - } -] [ - T{ live-interval - { vreg T{ vreg { reg-class int-regs } { n 1 } } } - { start 0 } - { end 10 } - { uses V{ 0 1 4 5 10 } } - { ranges V{ T{ live-range f 0 10 } } } - } 4 split-to-fit [ f >>split-next ] bi@ + } 10 split-for-spill ] unit-test [ @@ -352,6 +336,78 @@ check-numbering? on check-linear-scan ] must-fail +! Problem with spilling intervals with no more usages after the spill location + +[ ] [ + { + T{ live-interval + { vreg T{ vreg { n 1 } { reg-class int-regs } } } + { start 0 } + { end 20 } + { uses V{ 0 10 20 } } + { ranges V{ T{ live-range f 0 2 } T{ live-range f 10 20 } } } + } + T{ live-interval + { vreg T{ vreg { n 2 } { reg-class int-regs } } } + { start 0 } + { end 20 } + { uses V{ 0 10 20 } } + { ranges V{ T{ live-range f 0 2 } T{ live-range f 10 20 } } } + } + T{ live-interval + { vreg T{ vreg { n 3 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 6 } } + { ranges V{ T{ live-range f 4 8 } } } + } + T{ live-interval + { vreg T{ vreg { n 4 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 4 8 } } } + } + + ! This guy will invoke the 'spill partially available' code path + T{ live-interval + { vreg T{ vreg { n 5 } { reg-class int-regs } } } + { start 4 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 4 8 } } } + } + } + H{ { int-regs { "A" "B" } } } + check-linear-scan +] unit-test + + +! Test spill-new code path + +[ ] [ + { + T{ live-interval + { vreg T{ vreg { n 1 } { reg-class int-regs } } } + { start 0 } + { end 10 } + { uses V{ 0 6 10 } } + { ranges V{ T{ live-range f 0 10 } } } + } + + ! This guy will invoke the 'spill new' code path + T{ live-interval + { vreg T{ vreg { n 5 } { reg-class int-regs } } } + { start 2 } + { end 8 } + { uses V{ 8 } } + { ranges V{ T{ live-range f 2 8 } } } + } + } + H{ { int-regs { "A" } } } + check-linear-scan +] unit-test + SYMBOL: available SYMBOL: taken diff --git a/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor b/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor index 77aae14503..48bef197e6 100644 --- a/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor +++ b/basis/compiler/cfg/linear-scan/live-intervals/live-intervals.factor @@ -13,7 +13,6 @@ C: live-range TUPLE: live-interval vreg reg spill-to reload-from -split-before split-after split-next start end ranges uses copy-from ; From a9977d7c79239859f51e64fe23ecc70f251c1f1c Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 02:22:37 -0500 Subject: [PATCH 09/21] cpu.x86: update non-optimizing compiler backends for assembler vocab split --- basis/cpu/x86/32/bootstrap.factor | 3 ++- basis/cpu/x86/64/bootstrap.factor | 3 ++- basis/cpu/x86/64/winnt/bootstrap.factor | 3 ++- basis/cpu/x86/bootstrap.factor | 8 ++++---- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor index 490d37ccbc..674cc817d7 100644 --- a/basis/cpu/x86/32/bootstrap.factor +++ b/basis/cpu/x86/32/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser compiler.constants ; +cpu.x86.assembler cpu.x86.assembler.operands layouts +vocabs parser compiler.constants ; IN: bootstrap.x86 4 \ cell set diff --git a/basis/cpu/x86/64/bootstrap.factor b/basis/cpu/x86/64/bootstrap.factor index c5c7e63dbc..8b0d53cda5 100644 --- a/basis/cpu/x86/64/bootstrap.factor +++ b/basis/cpu/x86/64/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser compiler.constants math ; +layouts vocabs parser compiler.constants math +cpu.x86.assembler cpu.x86.assembler.operands ; IN: bootstrap.x86 8 \ cell set diff --git a/basis/cpu/x86/64/winnt/bootstrap.factor b/basis/cpu/x86/64/winnt/bootstrap.factor index ff15ef27af..0228082956 100644 --- a/basis/cpu/x86/64/winnt/bootstrap.factor +++ b/basis/cpu/x86/64/winnt/bootstrap.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser ; +layouts vocabs parser cpu.x86.assembler +cpu.x86.assembler.operands ; IN: bootstrap.x86 : stack-frame-size ( -- n ) 8 bootstrap-cells ; diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index 474ce2ea46..6363f17e48 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: bootstrap.image.private kernel kernel.private namespaces -system cpu.x86.assembler layouts compiler.units math -math.private compiler.constants vocabs slots.private words -locals.backend make sequences combinators arrays ; +USING: bootstrap.image.private kernel kernel.private namespaces system +layouts compiler.units math math.private compiler.constants vocabs +slots.private words locals.backend make sequences combinators arrays + cpu.x86.assembler cpu.x86.assembler.operands ; IN: bootstrap.x86 big-endian off From e1caaca6dfde03061639b6cfac927f98541a3562 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 02:45:29 -0500 Subject: [PATCH 10/21] bootstrap.compiler: compile a few more words early, for a big bootstrap speed boost --- basis/bootstrap/compiler/compiler.factor | 35 +++++++++--------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/basis/bootstrap/compiler/compiler.factor b/basis/bootstrap/compiler/compiler.factor index 0505dcb184..0a3ff10a8e 100755 --- a/basis/bootstrap/compiler/compiler.factor +++ b/basis/bootstrap/compiler/compiler.factor @@ -6,9 +6,8 @@ classes.private arrays hashtables vectors classes.tuple sbufs hashtables.private sequences.private math classes.tuple.private growable namespaces.private assocs words command-line vocabs io io.encodings.string libc splitting math.parser memory compiler.units -math.order compiler.tree.builder compiler.tree.optimizer -compiler.cfg.optimizer ; -FROM: compiler => enable-optimizer compile-word ; +math.order quotations quotations.private assocs.private ; +FROM: compiler => enable-optimizer ; IN: bootstrap.compiler ! Don't bring this in when deploying, since it will store a @@ -42,16 +41,24 @@ nl ! which are also quick to compile are replaced by ! compiled definitions as soon as possible. { - not + not ? + + 2over roll -roll array? hashtable? vector? tuple? sbuf? tombstone? + curry? compose? callable? + quotation? - array-nth set-array-nth + curry compose uncurry + + array-nth set-array-nth length>> wrap probe namestack* + + layout-of } compile-unoptimized "." write flush @@ -75,7 +82,7 @@ nl "." write flush { - hashcode* = get set + hashcode* = equal? assoc-stack (assoc-stack) get set } compile-unoptimized "." write flush @@ -100,22 +107,6 @@ nl "." write flush -{ build-tree } compile-unoptimized - -"." write flush - -{ optimize-tree } compile-unoptimized - -"." write flush - -{ optimize-cfg } compile-unoptimized - -"." write flush - -{ compile-word } compile-unoptimized - -"." write flush - vocabs [ words compile-unoptimized "." write flush ] each " done" print flush From d81dec5d459110483f193d9c7f5c8e98c2063fe2 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 05:04:46 -0500 Subject: [PATCH 11/21] cpu.x86: fix a bug in small-register logic on 32-bit. Also, on 32-bit, we don't need to do any special register shuffling to work with 16-bit operands since all registers have 16-bit variants. So now only 8-bit operands on x86-32 require special treatment --- basis/cpu/x86/x86.factor | 113 ++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 5dc3ef2e0a..6e21b46fd5 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -264,52 +264,48 @@ M:: x86 %box-alien ( dst src temp -- ) "end" resolve-label ] with-scope ; -HOOK: small-reg? cpu ( reg -- regs ) +! The 'small-reg' mess is pretty crappy, but its only used on x86-32. +! On x86-64, all registers have 8-bit versions. However, a similar +! problem arises for shifts, where the shift count must be in CL, and +! so one day I will fix this properly by adding precoloring to the +! register allocator. -M: x86.32 small-reg? { EAX ECX EDX EBX } memq? ; -M: x86.64 small-reg? drop t ; +HOOK: has-small-reg? cpu ( reg size -- ? ) + +CONSTANT: have-byte-regs { EAX ECX EDX EBX } + +M: x86.32 has-small-reg? + { + { 8 [ have-byte-regs memq? ] } + { 16 [ drop t ] } + { 32 [ drop t ] } + } case ; + +M: x86.64 has-small-reg? drop t ; : small-reg-that-isn't ( exclude -- reg' ) - [ native-version-of ] map [ small-reg? not ] find nip ; + [ have-byte-regs ] dip + [ native-version-of ] map + '[ _ memq? not ] find nip ; : with-save/restore ( reg quot -- ) [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline -:: with-small-register ( dst exclude quot: ( new-dst -- ) -- ) - #! If the destination register overlaps a small register, we - #! call the quot with that. Otherwise, we find a small - #! register that is not in exclude, and call quot, saving - #! and restoring the small register. - dst small-reg? [ dst quot call ] [ +:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- ) + ! If the destination register overlaps a small register with + ! 'size' bits, we call the quot with that. Otherwise, we find a + ! small register that is not in exclude, and call quot, saving and + ! restoring the small register. + dst size has-small-reg? [ dst quot call ] [ exclude small-reg-that-isn't [ quot call ] with-save/restore ] if ; inline -: shift-count? ( reg -- ? ) { ECX RCX } memq? ; - -:: emit-shift ( dst src1 src2 quot -- ) - src2 shift-count? [ - dst CL quot call - ] [ - dst shift-count? [ - dst src2 XCHG - src2 CL quot call - dst src2 XCHG - ] [ - ECX native-version-of [ - CL src2 MOV - drop dst CL quot call - ] with-save/restore - ] if - ] if ; inline - -M: x86 %shl [ SHL ] emit-shift ; -M: x86 %shr [ SHR ] emit-shift ; -M: x86 %sar [ SAR ] emit-shift ; - M:: x86 %string-nth ( dst src index temp -- ) + ! We request a small-reg of size 8 since those of size 16 are + ! a superset. "end" define-label - dst { src index temp } [| new-dst | + dst { src index temp } 8 [| new-dst | ! Load the least significant 7 bits into new-dst. ! 8th bit indicates whether we have to load from ! the aux vector or not. @@ -336,15 +332,15 @@ M:: x86 %string-nth ( dst src index temp -- ) ] with-small-register ; M:: x86 %set-string-nth-fast ( ch str index temp -- ) - ch { index str temp } [| new-ch | + ch { index str temp } 8 [| new-ch | new-ch ch ?MOV temp str index [+] LEA temp string-offset [+] new-ch 8-bit-version-of MOV ] with-small-register ; :: %alien-integer-getter ( dst src size quot -- ) - dst { src } [| new-dst | - new-dst dup size 8 * n-bit-version-of dup src [] MOV + dst { src } size [| new-dst | + new-dst dup size n-bit-version-of dup src [] MOV quot call dst new-dst ?MOV ] with-small-register ; inline @@ -352,35 +348,56 @@ M:: x86 %set-string-nth-fast ( ch str index temp -- ) : %alien-unsigned-getter ( dst src size -- ) [ MOVZX ] %alien-integer-getter ; inline -M: x86 %alien-unsigned-1 1 %alien-unsigned-getter ; -M: x86 %alien-unsigned-2 2 %alien-unsigned-getter ; +M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ; +M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ; +M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ; : %alien-signed-getter ( dst src size -- ) [ MOVSX ] %alien-integer-getter ; inline -M: x86 %alien-signed-1 1 %alien-signed-getter ; -M: x86 %alien-signed-2 2 %alien-signed-getter ; -M: x86 %alien-signed-4 4 %alien-signed-getter ; - -M: x86 %alien-unsigned-4 4 [ 2drop ] %alien-integer-getter ; +M: x86 %alien-signed-1 8 %alien-signed-getter ; +M: x86 %alien-signed-2 16 %alien-signed-getter ; +M: x86 %alien-signed-4 32 %alien-signed-getter ; M: x86 %alien-cell [] MOV ; M: x86 %alien-float dupd [] MOVSS dup CVTSS2SD ; M: x86 %alien-double [] MOVSD ; :: %alien-integer-setter ( ptr value size -- ) - value { ptr } [| new-value | + value { ptr } size [| new-value | new-value value ?MOV - ptr [] new-value size 8 * n-bit-version-of MOV + ptr [] new-value size n-bit-version-of MOV ] with-small-register ; inline -M: x86 %set-alien-integer-1 1 %alien-integer-setter ; -M: x86 %set-alien-integer-2 2 %alien-integer-setter ; -M: x86 %set-alien-integer-4 4 %alien-integer-setter ; +M: x86 %set-alien-integer-1 8 %alien-integer-setter ; +M: x86 %set-alien-integer-2 16 %alien-integer-setter ; +M: x86 %set-alien-integer-4 32 %alien-integer-setter ; M: x86 %set-alien-cell [ [] ] dip MOV ; M: x86 %set-alien-float dup dup CVTSD2SS [ [] ] dip MOVSS ; M: x86 %set-alien-double [ [] ] dip MOVSD ; +: shift-count? ( reg -- ? ) { ECX RCX } memq? ; + +:: emit-shift ( dst src1 src2 quot -- ) + src2 shift-count? [ + dst CL quot call + ] [ + dst shift-count? [ + dst src2 XCHG + src2 CL quot call + dst src2 XCHG + ] [ + ECX native-version-of [ + CL src2 MOV + drop dst CL quot call + ] with-save/restore + ] if + ] if ; inline + +M: x86 %shl [ SHL ] emit-shift ; +M: x86 %shr [ SHR ] emit-shift ; +M: x86 %sar [ SAR ] emit-shift ; + : load-zone-ptr ( reg -- ) #! Load pointer to start of zone array 0 MOV "nursery" f rc-absolute-cell rel-dlsym ; From d71e2f9577d347962c81462562167e6ab703f87b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 05:12:40 -0500 Subject: [PATCH 12/21] cpu.x86: Fix shuffle bug. Shuffling bugs occurring in code that runs before optimizer/stack checker is online are only caught at runtime during bootstrap, what a pain --- basis/cpu/x86/x86.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 6e21b46fd5..5bad8e067c 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -281,7 +281,7 @@ M: x86.32 has-small-reg? { 32 [ drop t ] } } case ; -M: x86.64 has-small-reg? drop t ; +M: x86.64 has-small-reg? 2drop t ; : small-reg-that-isn't ( exclude -- reg' ) [ have-byte-regs ] dip From cd7a1d6c5837215a704a7179a69db7726e603b81 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 08:27:52 -0500 Subject: [PATCH 13/21] Oopsie --- basis/cpu/x86/64/unix/bootstrap.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/cpu/x86/64/unix/bootstrap.factor b/basis/cpu/x86/64/unix/bootstrap.factor index e48a20a9de..b6d56840e2 100644 --- a/basis/cpu/x86/64/unix/bootstrap.factor +++ b/basis/cpu/x86/64/unix/bootstrap.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel namespaces system -cpu.x86.assembler layouts vocabs parser ; +cpu.x86.assembler cpu.x86.assembler.operands layouts vocabs parser ; IN: bootstrap.x86 : stack-frame-size ( -- n ) 4 bootstrap-cells ; From be363d1a5b8090c5b01faf30bf488a3650226d1b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 09:19:44 -0500 Subject: [PATCH 14/21] compiler.cfg: Get inline GC checks working again, using a dataflow analysis to compute uninitialized stack locations in compiler.cfg.stacks.uninitialized. Re-enable intrinsics which use inline allocation --- .../cfg/gc-checks/gc-checks-tests.factor | 26 +++++++ basis/compiler/cfg/gc-checks/gc-checks.factor | 28 ++++--- .../cfg/instructions/instructions.factor | 4 +- .../compiler/cfg/intrinsics/intrinsics.factor | 14 ++-- .../cfg/linearization/linearization.factor | 21 ++--- .../uninitialized/uninitialized-tests.factor | 61 +++++++++++++++ .../stacks/uninitialized/uninitialized.factor | 76 +++++++++++++++++++ basis/compiler/codegen/codegen.factor | 1 + basis/cpu/architecture/architecture.factor | 2 +- basis/cpu/x86/x86.factor | 7 +- 10 files changed, 211 insertions(+), 29 deletions(-) create mode 100644 basis/compiler/cfg/gc-checks/gc-checks-tests.factor create mode 100644 basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor create mode 100644 basis/compiler/cfg/stacks/uninitialized/uninitialized.factor diff --git a/basis/compiler/cfg/gc-checks/gc-checks-tests.factor b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor new file mode 100644 index 0000000000..7b3e07faf8 --- /dev/null +++ b/basis/compiler/cfg/gc-checks/gc-checks-tests.factor @@ -0,0 +1,26 @@ +IN: compiler.cfg.gc-checks.tests +USING: compiler.cfg.gc-checks compiler.cfg.debugger +compiler.cfg.registers compiler.cfg.instructions compiler.cfg +compiler.cfg.predecessors cpu.architecture tools.test kernel vectors +namespaces accessors sequences ; + +: test-gc-checks ( -- ) + cfg new 0 get >>entry + compute-predecessors + insert-gc-checks + drop ; + +V{ + T{ ##inc-d f 3 } + T{ ##replace f V int-regs 0 D 1 } +} 0 test-bb + +V{ + T{ ##box-float f V int-regs 0 V int-regs 1 } +} 1 test-bb + +0 get 1 get 1vector >>successors drop + +[ ] [ test-gc-checks ] unit-test + +[ V{ D 0 D 2 } ] [ 1 get instructions>> first uninitialized-locs>> ] unit-test \ No newline at end of file diff --git a/basis/compiler/cfg/gc-checks/gc-checks.factor b/basis/compiler/cfg/gc-checks/gc-checks.factor index 8435a231e6..c34f2c42a3 100644 --- a/basis/compiler/cfg/gc-checks/gc-checks.factor +++ b/basis/compiler/cfg/gc-checks/gc-checks.factor @@ -1,17 +1,27 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel sequences assocs -compiler.cfg.rpo compiler.cfg.instructions -compiler.cfg.hats ; +USING: accessors kernel sequences assocs fry +compiler.cfg.rpo +compiler.cfg.hats +compiler.cfg.registers +compiler.cfg.instructions +compiler.cfg.stacks.uninitialized ; IN: compiler.cfg.gc-checks -: gc? ( bb -- ? ) +: insert-gc-check? ( bb -- ? ) instructions>> [ ##allocation? ] any? ; -: insert-gc-check ( basic-block -- ) - dup gc? [ - [ i i f \ ##gc new-insn prefix ] change-instructions drop - ] [ drop ] if ; +: blocks-with-gc ( cfg -- bbs ) + post-order [ insert-gc-check? ] filter ; + +: insert-gc-check ( bb -- ) + dup '[ + i i f _ uninitialized-locs \ ##gc new-insn + prefix + ] change-instructions drop ; : insert-gc-checks ( cfg -- cfg' ) - dup [ insert-gc-check ] each-basic-block ; \ No newline at end of file + dup blocks-with-gc [ + over compute-uninitialized-sets + [ insert-gc-check ] each + ] unless-empty ; \ No newline at end of file diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index e08b3b25bb..0a52f1aa94 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -190,7 +190,7 @@ INSN: ##fixnum-add < ##fixnum-overflow ; INSN: ##fixnum-sub < ##fixnum-overflow ; INSN: ##fixnum-mul < ##fixnum-overflow ; -INSN: ##gc temp1 temp2 live-values ; +INSN: ##gc temp1 temp2 live-values uninitialized-locs ; ! Instructions used by machine IR only. INSN: _prologue stack-frame ; @@ -219,7 +219,7 @@ INSN: _fixnum-mul < _fixnum-overflow ; TUPLE: spill-slot n ; C: spill-slot -INSN: _gc temp1 temp2 gc-roots gc-root-count gc-root-size ; +INSN: _gc temp1 temp2 gc-roots gc-root-count gc-root-size uninitialized-locs ; ! These instructions operate on machine registers and not ! virtual registers diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index c6642d8ad9..2618db0904 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -48,11 +48,11 @@ IN: compiler.cfg.intrinsics slots.private:set-slot strings.private:string-nth strings.private:set-string-nth-fast - ! classes.tuple.private: - ! arrays: - ! byte-arrays: - ! byte-arrays:(byte-array) - ! kernel: + classes.tuple.private: + arrays: + byte-arrays: + byte-arrays:(byte-array) + kernel: alien.accessors:alien-unsigned-1 alien.accessors:set-alien-unsigned-1 alien.accessors:alien-signed-1 @@ -61,7 +61,7 @@ IN: compiler.cfg.intrinsics alien.accessors:set-alien-unsigned-2 alien.accessors:alien-signed-2 alien.accessors:set-alien-signed-2 - ! alien.accessors:alien-cell + alien.accessors:alien-cell alien.accessors:set-alien-cell } [ t "intrinsic" set-word-prop ] each @@ -90,7 +90,7 @@ IN: compiler.cfg.intrinsics alien.accessors:set-alien-float alien.accessors:alien-double alien.accessors:set-alien-double - } drop f [ t "intrinsic" set-word-prop ] each ; + } [ t "intrinsic" set-word-prop ] each ; : enable-fixnum-log2 ( -- ) \ math.integers.private:fixnum-log2 t "intrinsic" set-word-prop ; diff --git a/basis/compiler/cfg/linearization/linearization.factor b/basis/compiler/cfg/linearization/linearization.factor index 97fb3205c2..cbeb301901 100755 --- a/basis/compiler/cfg/linearization/linearization.factor +++ b/basis/compiler/cfg/linearization/linearization.factor @@ -98,15 +98,18 @@ M: ##dispatch linearize-insn M: ##gc linearize-insn nip - [ temp1>> ] - [ temp2>> ] - [ - live-values>> - [ compute-gc-roots ] - [ count-gc-roots ] - [ gc-roots-size ] - tri - ] tri + { + [ temp1>> ] + [ temp2>> ] + [ + live-values>> + [ compute-gc-roots ] + [ count-gc-roots ] + [ gc-roots-size ] + tri + ] + [ uninitialized-locs>> ] + } cleave _gc ; : linearize-basic-blocks ( cfg -- insns ) diff --git a/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor b/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor new file mode 100644 index 0000000000..6f3e35994a --- /dev/null +++ b/basis/compiler/cfg/stacks/uninitialized/uninitialized-tests.factor @@ -0,0 +1,61 @@ +IN: compiler.cfg.stacks.uninitialized.tests +USING: compiler.cfg.stacks.uninitialized compiler.cfg.debugger +compiler.cfg.registers compiler.cfg.instructions compiler.cfg +compiler.cfg.predecessors cpu.architecture tools.test kernel vectors +namespaces accessors sequences ; + +: test-uninitialized ( -- ) + cfg new 0 get >>entry + compute-predecessors + compute-uninitialized-sets ; + +V{ + T{ ##inc-d f 3 } +} 0 test-bb + +V{ + T{ ##replace f V int-regs 0 D 0 } + T{ ##replace f V int-regs 0 D 1 } + T{ ##replace f V int-regs 0 D 2 } + T{ ##inc-r f 1 } +} 1 test-bb + +V{ + T{ ##peek f V int-regs 0 D 0 } + T{ ##inc-d f 1 } +} 2 test-bb + +0 get 1 get 1vector >>successors drop +1 get 2 get 1vector >>successors drop + +[ ] [ test-uninitialized ] unit-test + +[ V{ D 0 D 1 D 2 } ] [ 1 get uninitialized-locs ] unit-test +[ V{ R 0 } ] [ 2 get uninitialized-locs ] unit-test + +! When merging, if a location is uninitialized in one branch and +! initialized in another, we have to consider it uninitialized, +! since it cannot be safely read from by a ##peek, or traced by GC. + +V{ } 0 test-bb + +V{ + T{ ##inc-d f 1 } +} 1 test-bb + +V{ + T{ ##call f namestack } + T{ ##branch } +} 2 test-bb + +V{ + T{ ##return } +} 3 test-bb + +0 get 1 get 2 get V{ } 2sequence >>successors drop +1 get 3 get 1vector >>successors drop +2 get 3 get 1vector >>successors drop + +[ ] [ test-uninitialized ] unit-test + +[ V{ D 0 } ] [ 3 get uninitialized-locs ] unit-test diff --git a/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor new file mode 100644 index 0000000000..ee60c4bd7a --- /dev/null +++ b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor @@ -0,0 +1,76 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel sequences byte-arrays namespaces accessors classes math +math.order fry arrays combinators compiler.cfg.registers +compiler.cfg.instructions compiler.cfg.dataflow-analysis ; +IN: compiler.cfg.stacks.uninitialized + +! Uninitialized stack location analysis. + +! Consider the following sequence of instructions: +! ##inc-d 2 +! _gc +! ##replace ... D 0 +! ##replace ... D 1 +! The GC check runs before stack locations 0 and 1 have been initialized, +! and it needs to zero them out so that GC doesn't try to trace them. + + ] [ prepend ] } + } cond + ] change ; + +M: ##inc-d visit-insn n>> ds-loc handle-inc ; + +M: ##inc-r visit-insn n>> rs-loc handle-inc ; + +ERROR: uninitialized-peek insn ; + +M: ##peek visit-insn + dup loc>> [ n>> ] [ class get ] bi ?nth 0 = + [ uninitialized-peek ] [ drop ] if ; + +M: ##replace visit-insn + loc>> [ n>> ] [ class get ] bi + 2dup length < [ [ 1 ] 2dip set-nth ] [ 2drop ] if ; + +M: insn visit-insn drop ; + +: prepare ( pair -- ) + [ first2 [ [ clone ] [ B{ } ] if* ] bi@ ] [ B{ } B{ } ] if* + [ ds-loc set ] [ rs-loc set ] bi* ; + +: visit-block ( bb -- ) instructions>> [ visit-insn ] each ; + +: finish ( -- pair ) ds-loc get rs-loc get 2array ; + +: (join-sets) ( seq1 seq2 -- seq ) + 2dup [ length ] bi@ max '[ _ 1 pad-tail ] bi@ [ min ] 2map ; + +: (uninitialized-locs) ( seq quot -- seq' ) + [ dup length [ drop 0 = ] pusher [ 2each ] dip ] dip map ; inline + +PRIVATE> + +FORWARD-ANALYSIS: uninitialized + +M: uninitialized-analysis transfer-set ( pair bb analysis -- pair' ) + drop [ prepare ] dip visit-block finish ; + +M: uninitialized-analysis join-sets ( sets analysis -- pair ) + drop sift [ f ] [ [ ] [ [ (join-sets) ] 2map ] map-reduce ] if-empty ; + +: uninitialized-locs ( bb -- locs ) + uninitialized-in dup [ + first2 + [ [ ] (uninitialized-locs) ] + [ [ ] (uninitialized-locs) ] + bi* append + ] when ; \ No newline at end of file diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index f9a4786eb5..c387c4ed8d 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -221,6 +221,7 @@ M: _gc generate-insn [ temp2>> ] [ gc-roots>> ] [ gc-root-count>> ] + [ uninitialized-locs>> ] } cleave %gc ; M: _loop-entry generate-insn drop %loop-entry ; diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index deb44db41a..b22e91056f 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -128,7 +128,7 @@ HOOK: %alien-global cpu ( dst symbol library -- ) HOOK: %allot cpu ( dst size class temp -- ) HOOK: %write-barrier cpu ( src card# table -- ) -HOOK: %gc cpu ( temp1 temp2 live-registers live-spill-slots -- ) +HOOK: %gc cpu ( temp1 temp2 live-registers live-spill-slots uninitialized-locs -- ) HOOK: %prologue cpu ( n -- ) HOOK: %epilogue cpu ( n -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 5bad8e067c..4fad6d4efc 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -466,6 +466,10 @@ M:: word load-gc-root ( gc-root register temp -- ) : load-gc-roots ( gc-roots temp -- ) '[ _ load-gc-root ] assoc-each ; +: wipe-locs ( locs -- ) + ! See explanation in compiler.cfg.stacks.uninitialized + [ 0 ] dip [ %replace ] with each ; + :: call-gc ( gc-root-count -- ) ! Pass pointer to start of GC roots as first parameter param-reg-1 gc-root-base param@ LEA @@ -475,11 +479,12 @@ M:: word load-gc-root ( gc-root register temp -- ) %prepare-alien-invoke "inline_gc" f %alien-invoke ; -M:: x86 %gc ( temp1 temp2 gc-roots gc-root-count -- ) +M:: x86 %gc ( temp1 temp2 gc-roots gc-root-count uninitialized-locs -- ) "end" define-label temp1 temp2 check-nursery "end" get JLE gc-roots temp1 save-gc-roots + uninitialized-locs wipe-locs gc-root-count call-gc gc-roots temp1 load-gc-roots "end" resolve-label ; From cc11727627d127718179f3d52ad388b40fb55bbf Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 09:24:41 -0500 Subject: [PATCH 15/21] benchmark.pidigits: reduce parameter to speed up CI runs --- extra/benchmark/pidigits/pidigits.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/benchmark/pidigits/pidigits.factor b/extra/benchmark/pidigits/pidigits.factor index 0f8a98e6f9..d001d81a8c 100644 --- a/extra/benchmark/pidigits/pidigits.factor +++ b/extra/benchmark/pidigits/pidigits.factor @@ -54,6 +54,6 @@ IN: benchmark.pidigits [ 1 { { 1 0 } { 0 1 } } ] dip 0 0 (pidigits) ; : pidigits-main ( -- ) - 10000 pidigits ; + 2000 pidigits ; MAIN: pidigits-main From c7dde45c2a597a93d6b62734351dfd6a562814fb Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 30 Jul 2009 10:58:34 -0500 Subject: [PATCH 16/21] hyphens>underscores in VERTEX-FORMAT for consistency with UNIFORM-TUPLE --- extra/gpu/render/render.factor | 4 +--- extra/gpu/shaders/shaders.factor | 16 +++++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/extra/gpu/render/render.factor b/extra/gpu/render/render.factor index ce6e0e25ff..8f1679bfa8 100644 --- a/extra/gpu/render/render.factor +++ b/extra/gpu/render/render.factor @@ -8,7 +8,7 @@ gpu.textures gpu.textures.private half-floats images kernel lexer locals math math.order math.parser namespaces opengl opengl.gl parser quotations sequences slots sorting specialized-arrays.alien specialized-arrays.float specialized-arrays.int -specialized-arrays.uint strings tr ui.gadgets.worlds variants +specialized-arrays.uint strings ui.gadgets.worlds variants vocabs.parser words ; IN: gpu.render @@ -338,8 +338,6 @@ DEFER: [bind-uniform-tuple] texture-unit' value>>-quot { value-cleave 2cleave } append ; -TR: hyphens>underscores "-" "_" ; - :: [bind-uniform] ( texture-unit uniform prefix -- texture-unit' quot ) prefix uniform name>> append hyphens>underscores :> name uniform uniform-type>> :> type diff --git a/extra/gpu/shaders/shaders.factor b/extra/gpu/shaders/shaders.factor index d2dd29595a..58633d4a71 100755 --- a/extra/gpu/shaders/shaders.factor +++ b/extra/gpu/shaders/shaders.factor @@ -8,7 +8,7 @@ io.encodings.ascii io.files io.pathnames kernel lexer literals locals math math.parser memoize multiline namespaces opengl opengl.gl opengl.shaders parser quotations sequences specialized-arrays.alien specialized-arrays.int splitting -strings ui.gadgets.worlds variants vectors vocabs vocabs.loader +strings tr ui.gadgets.worlds variants vectors vocabs vocabs.loader vocabs.parser words words.constant ; IN: gpu.shaders @@ -65,6 +65,8 @@ MEMO: output-index ( program-instance output-name -- index ) underscores "-" "_" ; + : gl-vertex-type ( component-type -- gl-type ) { { ubyte-components [ GL_UNSIGNED_BYTE ] } @@ -125,12 +127,12 @@ MEMO: output-index ( program-instance output-name -- index ) } 0&& [ vertex-attribute inaccurate-feedback-attribute-error ] unless ; :: [bind-vertex-attribute] ( stride offset vertex-attribute -- stride offset' quot ) - vertex-attribute name>> :> name - vertex-attribute component-type>> :> type - type gl-vertex-type :> gl-type - vertex-attribute dim>> :> dim - vertex-attribute normalize?>> >c-bool :> normalize? - vertex-attribute vertex-attribute-size :> size + vertex-attribute name>> hyphens>underscores :> name + vertex-attribute component-type>> :> type + type gl-vertex-type :> gl-type + vertex-attribute dim>> :> dim + vertex-attribute normalize?>> >c-bool :> normalize? + vertex-attribute vertex-attribute-size :> size stride offset size + { From 455956b16c917b6e6f9809ea1288296b04714466 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Thu, 30 Jul 2009 11:05:12 -0500 Subject: [PATCH 17/21] add additional SSE2 packed integer operations --- .../cpu/x86/assembler/assembler-tests.factor | 1 + basis/cpu/x86/assembler/assembler.factor | 76 ++++++++++++++++--- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/basis/cpu/x86/assembler/assembler-tests.factor b/basis/cpu/x86/assembler/assembler-tests.factor index 66adee6bf6..1fe65b719c 100644 --- a/basis/cpu/x86/assembler/assembler-tests.factor +++ b/basis/cpu/x86/assembler/assembler-tests.factor @@ -68,6 +68,7 @@ IN: cpu.x86.assembler.tests ! sse shift instructions [ { HEX: 66 HEX: 0f HEX: 71 HEX: d0 HEX: 05 } ] [ [ XMM0 5 PSRLW ] { } make ] unit-test +[ { HEX: 66 HEX: 0f HEX: d1 HEX: c1 } ] [ [ XMM0 XMM1 PSRLW ] { } make ] unit-test ! sse comparison instructions [ { HEX: 66 HEX: 0f HEX: c2 HEX: c1 HEX: 02 } ] [ [ XMM0 XMM1 CMPLEPD ] { } make ] unit-test diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index e91ebdcb1a..1bcf672ce7 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -681,24 +681,57 @@ ALIAS: PINSRQ PINSRD : MAXPD ( dest src -- ) HEX: 5f HEX: 66 2-operand-rm-sse ; : MAXSD ( dest src -- ) HEX: 5f HEX: f2 2-operand-rm-sse ; : MAXSS ( dest src -- ) HEX: 5f HEX: f3 2-operand-rm-sse ; +: PUNPCKLBW ( dest src -- ) HEX: 60 HEX: 66 2-operand-rm-sse ; +: PUNPCKLWD ( dest src -- ) HEX: 61 HEX: 66 2-operand-rm-sse ; +: PUNPCKLDQ ( dest src -- ) HEX: 62 HEX: 66 2-operand-rm-sse ; +: PACKSSWB ( dest src -- ) HEX: 63 HEX: 66 2-operand-rm-sse ; +: PCMPGTB ( dest src -- ) HEX: 64 HEX: 66 2-operand-rm-sse ; +: PCMPGTW ( dest src -- ) HEX: 65 HEX: 66 2-operand-rm-sse ; +: PCMPGTD ( dest src -- ) HEX: 66 HEX: 66 2-operand-rm-sse ; +: PACKUSWB ( dest src -- ) HEX: 67 HEX: 66 2-operand-rm-sse ; +: PUNPCKHBW ( dest src -- ) HEX: 68 HEX: 66 2-operand-rm-sse ; +: PUNPCKHWD ( dest src -- ) HEX: 69 HEX: 66 2-operand-rm-sse ; +: PUNPCKHDQ ( dest src -- ) HEX: 6a HEX: 66 2-operand-rm-sse ; +: PACKSSDW ( dest src -- ) HEX: 6b HEX: 66 2-operand-rm-sse ; : PUNPCKLQDQ ( dest src -- ) HEX: 6c HEX: 66 2-operand-rm-sse ; : PUNPCKHQDQ ( dest src -- ) HEX: 6d HEX: 66 2-operand-rm-sse ; +: MOVD ( dest src -- ) { HEX: 6e HEX: 7e } HEX: 66 2-operand-rm-mr-sse ; : MOVDQA ( dest src -- ) { HEX: 6f HEX: 7f } HEX: 66 2-operand-rm-mr-sse ; : MOVDQU ( dest src -- ) { HEX: 6f HEX: 7f } HEX: f3 2-operand-rm-mr-sse ; : PSHUFD ( dest src imm -- ) HEX: 70 HEX: 66 3-operand-rm-sse ; : PSHUFLW ( dest src imm -- ) HEX: 70 HEX: f2 3-operand-rm-sse ; : PSHUFHW ( dest src imm -- ) HEX: 70 HEX: f3 3-operand-rm-sse ; -: PSRLW ( dest imm -- ) BIN: 010 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSRAW ( dest imm -- ) BIN: 100 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSLLW ( dest imm -- ) BIN: 110 HEX: 71 HEX: 66 2-operand-sse-shift ; -: PSRLD ( dest imm -- ) BIN: 010 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSRAD ( dest imm -- ) BIN: 100 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSLLD ( dest imm -- ) BIN: 110 HEX: 72 HEX: 66 2-operand-sse-shift ; -: PSRLQ ( dest imm -- ) BIN: 010 HEX: 73 HEX: 66 2-operand-sse-shift ; + +: (PSRLW-imm) ( dest imm -- ) BIN: 010 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSRAW-imm) ( dest imm -- ) BIN: 100 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSLLW-imm) ( dest imm -- ) BIN: 110 HEX: 71 HEX: 66 2-operand-sse-shift ; +: (PSRLD-imm) ( dest imm -- ) BIN: 010 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSRAD-imm) ( dest imm -- ) BIN: 100 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSLLD-imm) ( dest imm -- ) BIN: 110 HEX: 72 HEX: 66 2-operand-sse-shift ; +: (PSRLQ-imm) ( dest imm -- ) BIN: 010 HEX: 73 HEX: 66 2-operand-sse-shift ; +: (PSLLQ-imm) ( dest imm -- ) BIN: 110 HEX: 73 HEX: 66 2-operand-sse-shift ; + +: (PSRLW-reg) ( dest src -- ) HEX: d1 HEX: 66 2-operand-rm-sse ; +: (PSRLD-reg) ( dest src -- ) HEX: d2 HEX: 66 2-operand-rm-sse ; +: (PSRLQ-reg) ( dest src -- ) HEX: d3 HEX: 66 2-operand-rm-sse ; +: (PSRAW-reg) ( dest src -- ) HEX: e1 HEX: 66 2-operand-rm-sse ; +: (PSRAD-reg) ( dest src -- ) HEX: e2 HEX: 66 2-operand-rm-sse ; +: (PSLLW-reg) ( dest src -- ) HEX: f1 HEX: 66 2-operand-rm-sse ; +: (PSLLD-reg) ( dest src -- ) HEX: f2 HEX: 66 2-operand-rm-sse ; +: (PSLLQ-reg) ( dest src -- ) HEX: f3 HEX: 66 2-operand-rm-sse ; + +: PSRLW ( dest src -- ) dup integer? [ (PSRLW-imm) ] [ (PSRLW-reg) ] if ; +: PSRAW ( dest src -- ) dup integer? [ (PSRAW-imm) ] [ (PSRAW-reg) ] if ; +: PSLLW ( dest src -- ) dup integer? [ (PSLLW-imm) ] [ (PSLLW-reg) ] if ; +: PSRLD ( dest src -- ) dup integer? [ (PSRLD-imm) ] [ (PSRLD-reg) ] if ; +: PSRAD ( dest src -- ) dup integer? [ (PSRAD-imm) ] [ (PSRAD-reg) ] if ; +: PSLLD ( dest src -- ) dup integer? [ (PSLLD-imm) ] [ (PSLLD-reg) ] if ; +: PSRLQ ( dest src -- ) dup integer? [ (PSRLQ-imm) ] [ (PSRLQ-reg) ] if ; +: PSLLQ ( dest src -- ) dup integer? [ (PSLLQ-imm) ] [ (PSLLQ-reg) ] if ; + : PSRLDQ ( dest imm -- ) BIN: 011 HEX: 73 HEX: 66 2-operand-sse-shift ; -: PSLLQ ( dest imm -- ) BIN: 110 HEX: 73 HEX: 66 2-operand-sse-shift ; : PSLLDQ ( dest imm -- ) BIN: 111 HEX: 73 HEX: 66 2-operand-sse-shift ; : PCMPEQB ( dest src -- ) HEX: 74 HEX: 66 2-operand-rm-sse ; @@ -709,11 +742,14 @@ ALIAS: PINSRQ PINSRD : HSUBPD ( dest src -- ) HEX: 7d HEX: 66 2-operand-rm-sse ; : HSUBPS ( dest src -- ) HEX: 7d HEX: f2 2-operand-rm-sse ; +: FXSAVE ( dest -- ) { BIN: 000 f { HEX: 0f HEX: ae } } 1-operand ; +: FXRSTOR ( src -- ) { BIN: 001 f { HEX: 0f HEX: ae } } 1-operand ; : LDMXCSR ( src -- ) { BIN: 010 f { HEX: 0f HEX: ae } } 1-operand ; : STMXCSR ( dest -- ) { BIN: 011 f { HEX: 0f HEX: ae } } 1-operand ; : LFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 350 , ; : MFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 360 , ; : SFENCE ( -- ) HEX: 0f , HEX: ae , OCT: 370 , ; +: CLFLUSH ( dest -- ) { BIN: 111 f { HEX: 0f HEX: ae } } 1-operand ; : POPCNT ( dest src -- ) HEX: b8 HEX: f3 2-operand-rm-sse ; @@ -762,26 +798,46 @@ ALIAS: PINSRQ PINSRD : ADDSUBPD ( dest src -- ) HEX: d0 HEX: 66 2-operand-rm-sse ; : ADDSUBPS ( dest src -- ) HEX: d0 HEX: f2 2-operand-rm-sse ; : PADDQ ( dest src -- ) HEX: d4 HEX: 66 2-operand-rm-sse ; +: PMULLW ( dest src -- ) HEX: d5 HEX: 66 2-operand-rm-sse ; +: PMOVMSKB ( dest src -- ) HEX: d7 HEX: 66 2-operand-rm-sse ; +: PSUBUSB ( dest src -- ) HEX: d8 HEX: 66 2-operand-rm-sse ; +: PSUBUSW ( dest src -- ) HEX: d9 HEX: 66 2-operand-rm-sse ; : PMINUB ( dest src -- ) HEX: da HEX: 66 2-operand-rm-sse ; +: PAND ( dest src -- ) HEX: db HEX: 66 2-operand-rm-sse ; +: PADDUSB ( dest src -- ) HEX: dc HEX: 66 2-operand-rm-sse ; +: PADDUSW ( dest src -- ) HEX: dd HEX: 66 2-operand-rm-sse ; : PMAXUB ( dest src -- ) HEX: de HEX: 66 2-operand-rm-sse ; +: PANDN ( dest src -- ) HEX: df HEX: 66 2-operand-rm-sse ; : PAVGB ( dest src -- ) HEX: e0 HEX: 66 2-operand-rm-sse ; : PAVGW ( dest src -- ) HEX: e3 HEX: 66 2-operand-rm-sse ; : PMULHUW ( dest src -- ) HEX: e4 HEX: 66 2-operand-rm-sse ; +: PMULHW ( dest src -- ) HEX: e5 HEX: 66 2-operand-rm-sse ; : CVTTPD2DQ ( dest src -- ) HEX: e6 HEX: 66 2-operand-rm-sse ; : CVTPD2DQ ( dest src -- ) HEX: e6 HEX: f2 2-operand-rm-sse ; : CVTDQ2PD ( dest src -- ) HEX: e6 HEX: f3 2-operand-rm-sse ; : MOVNTDQ ( dest src -- ) HEX: e7 HEX: 66 2-operand-mr-sse ; +: PSUBSB ( dest src -- ) HEX: e8 HEX: 66 2-operand-rm-sse ; +: PSUBSW ( dest src -- ) HEX: e9 HEX: 66 2-operand-rm-sse ; : PMINSW ( dest src -- ) HEX: ea HEX: 66 2-operand-rm-sse ; +: POR ( dest src -- ) HEX: eb HEX: 66 2-operand-rm-sse ; +: PADDSB ( dest src -- ) HEX: ec HEX: 66 2-operand-rm-sse ; +: PADDSW ( dest src -- ) HEX: ed HEX: 66 2-operand-rm-sse ; : PMAXSW ( dest src -- ) HEX: ee HEX: 66 2-operand-rm-sse ; +: PXOR ( dest src -- ) HEX: ef HEX: 66 2-operand-rm-sse ; : LDDQU ( dest src -- ) HEX: f0 HEX: f2 2-operand-rm-sse ; : PMULUDQ ( dest src -- ) HEX: f4 HEX: 66 2-operand-rm-sse ; +: PMADDWD ( dest src -- ) HEX: f5 HEX: 66 2-operand-rm-sse ; : PSADBW ( dest src -- ) HEX: f6 HEX: 66 2-operand-rm-sse ; - : MASKMOVDQU ( dest src -- ) HEX: f7 HEX: 66 2-operand-rm-sse ; - +: PSUBB ( dest src -- ) HEX: f8 HEX: 66 2-operand-rm-sse ; +: PSUBW ( dest src -- ) HEX: f9 HEX: 66 2-operand-rm-sse ; +: PSUBD ( dest src -- ) HEX: fa HEX: 66 2-operand-rm-sse ; : PSUBQ ( dest src -- ) HEX: fb HEX: 66 2-operand-rm-sse ; +: PADDB ( dest src -- ) HEX: fc HEX: 66 2-operand-rm-sse ; +: PADDW ( dest src -- ) HEX: fd HEX: 66 2-operand-rm-sse ; +: PADDD ( dest src -- ) HEX: fe HEX: 66 2-operand-rm-sse ; ! x86-64 branch prediction hints From 47920a7a0c937021978c287bf3c22db2d6510c64 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 21:27:52 -0500 Subject: [PATCH 18/21] Passing -profile-compiler switch to bootstrap collects timing information from optimizer passes --- basis/bootstrap/compiler/compiler.factor | 4 ++ basis/bootstrap/compiler/timing/timing.factor | 38 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 basis/bootstrap/compiler/timing/timing.factor diff --git a/basis/bootstrap/compiler/compiler.factor b/basis/bootstrap/compiler/compiler.factor index 0a3ff10a8e..4394535b8d 100755 --- a/basis/bootstrap/compiler/compiler.factor +++ b/basis/bootstrap/compiler/compiler.factor @@ -10,6 +10,10 @@ math.order quotations quotations.private assocs.private ; FROM: compiler => enable-optimizer ; IN: bootstrap.compiler +"profile-compiler" get [ + "bootstrap.compiler.timing" require +] when + ! Don't bring this in when deploying, since it will store a ! reference to 'eval' in a global variable "deploy-vocab" get "staging" get or [ diff --git a/basis/bootstrap/compiler/timing/timing.factor b/basis/bootstrap/compiler/timing/timing.factor new file mode 100644 index 0000000000..e1466e3409 --- /dev/null +++ b/basis/bootstrap/compiler/timing/timing.factor @@ -0,0 +1,38 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors compiler.cfg.builder compiler.cfg.linear-scan +compiler.cfg.liveness compiler.cfg.mr compiler.cfg.optimizer +compiler.cfg.stacks.finalize compiler.cfg.stacks.global +compiler.codegen compiler.tree.builder compiler.tree.optimizer +kernel make sequences tools.annotations tools.crossref ; +IN: bootstrap.compiler.timing + +: passes ( word -- seq ) + def>> uses [ vocabulary>> "compiler." head? ] filter ; + +: high-level-passes ( -- seq ) \ optimize-tree passes ; + +: low-level-passes ( -- seq ) \ optimize-cfg passes ; + +: machine-passes ( -- seq ) \ build-mr passes ; + +: linear-scan-passes ( -- seq ) \ (linear-scan) passes ; + +: all-passes ( -- seq ) + [ + \ build-tree , + \ optimize-tree , + high-level-passes % + \ build-cfg , + \ compute-global-sets , + \ finalize-stack-shuffling , + \ optimize-cfg , + low-level-passes % + \ compute-live-sets , + \ build-mr , + machine-passes % + linear-scan-passes % + \ generate , + ] { } make ; + +all-passes [ [ reset ] [ add-timing ] bi ] each \ No newline at end of file From 45770c62504cf68f11c1fc61e89eea0bf51c4336 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 21:28:27 -0500 Subject: [PATCH 19/21] Move a bunch of GC check generation logic to platform-independent side --- basis/compiler/codegen/codegen.factor | 44 +++++++++++++++++---- basis/cpu/architecture/architecture.factor | 7 +++- basis/cpu/x86/x86.factor | 45 +++------------------- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index c387c4ed8d..672ed9ce02 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -4,7 +4,7 @@ USING: namespaces make math math.order math.parser sequences accessors kernel kernel.private layouts assocs words summary arrays combinators classes.algebra alien alien.c-types alien.structs alien.strings alien.arrays alien.complex alien.libraries sets libc -continuations.private fry cpu.architecture classes +continuations.private fry cpu.architecture classes locals source-files.errors compiler.errors compiler.alien @@ -215,14 +215,44 @@ M: ##write-barrier generate-insn [ table>> ] tri %write-barrier ; +! GC checks +: wipe-locs ( locs temp -- ) + '[ + _ + [ 0 %load-immediate ] + [ swap [ %replace ] with each ] bi + ] unless-empty ; + +GENERIC# save-gc-root 1 ( gc-root operand temp -- ) + +M:: spill-slot save-gc-root ( gc-root operand temp -- ) + temp operand n>> %reload-integer + gc-root temp %save-gc-root ; + +M: object save-gc-root drop %save-gc-root ; + +: save-gc-roots ( gc-roots temp -- ) '[ _ save-gc-root ] assoc-each ; + +GENERIC# load-gc-root 1 ( gc-root operand temp -- ) + +M:: spill-slot load-gc-root ( gc-root operand temp -- ) + gc-root temp %load-gc-root + temp operand n>> %spill-integer ; + +M: object load-gc-root drop %load-gc-root ; + +: load-gc-roots ( gc-roots temp -- ) '[ _ load-gc-root ] assoc-each ; + M: _gc generate-insn + "no-gc" define-label { - [ temp1>> ] - [ temp2>> ] - [ gc-roots>> ] - [ gc-root-count>> ] - [ uninitialized-locs>> ] - } cleave %gc ; + [ [ "no-gc" get ] dip [ temp1>> ] [ temp2>> ] bi %check-nursery ] + [ [ uninitialized-locs>> ] [ temp1>> ] bi wipe-locs ] + [ [ gc-roots>> ] [ temp1>> ] bi save-gc-roots ] + [ gc-root-count>> %call-gc ] + [ [ gc-roots>> ] [ temp1>> ] bi load-gc-roots ] + } cleave + "no-gc" resolve-label ; M: _loop-entry generate-insn drop %loop-entry ; diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index b22e91056f..e4c8f3246d 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -128,7 +128,12 @@ HOOK: %alien-global cpu ( dst symbol library -- ) HOOK: %allot cpu ( dst size class temp -- ) HOOK: %write-barrier cpu ( src card# table -- ) -HOOK: %gc cpu ( temp1 temp2 live-registers live-spill-slots uninitialized-locs -- ) + +! GC checks +HOOK: %check-nursery cpu ( label temp1 temp2 -- ) +HOOK: %save-gc-root cpu ( gc-root register -- ) +HOOK: %load-gc-root cpu ( gc-root register -- ) +HOOK: %call-gc cpu ( gc-root-count -- ) HOOK: %prologue cpu ( n -- ) HOOK: %epilogue cpu ( n -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 4fad6d4efc..34b1b63581 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -435,42 +435,19 @@ M:: x86 %write-barrier ( src card# table -- ) table table [] MOV table card# [+] card-mark MOV ; -:: check-nursery ( temp1 temp2 -- ) +M:: x86 %check-nursery ( label temp1 temp2 -- ) temp1 load-zone-ptr temp2 temp1 cell [+] MOV temp2 1024 ADD temp1 temp1 3 cells [+] MOV - temp2 temp1 CMP ; + temp2 temp1 CMP + label JLE ; -GENERIC# save-gc-root 1 ( gc-root operand temp -- ) +M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ; -M:: spill-slot save-gc-root ( gc-root spill-slot temp -- ) - temp spill-slot n>> spill-integer@ MOV - gc-root gc-root@ temp MOV ; +M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ; -M:: word save-gc-root ( gc-root register temp -- ) - gc-root gc-root@ register MOV ; - -: save-gc-roots ( gc-roots temp -- ) - '[ _ save-gc-root ] assoc-each ; - -GENERIC# load-gc-root 1 ( gc-root operand temp -- ) - -M:: spill-slot load-gc-root ( gc-root spill-slot temp -- ) - temp gc-root gc-root@ MOV - spill-slot n>> spill-integer@ temp MOV ; - -M:: word load-gc-root ( gc-root register temp -- ) - register gc-root gc-root@ MOV ; - -: load-gc-roots ( gc-roots temp -- ) - '[ _ load-gc-root ] assoc-each ; - -: wipe-locs ( locs -- ) - ! See explanation in compiler.cfg.stacks.uninitialized - [ 0 ] dip [ %replace ] with each ; - -:: call-gc ( gc-root-count -- ) +M:: x86 %call-gc ( gc-root-count -- ) ! Pass pointer to start of GC roots as first parameter param-reg-1 gc-root-base param@ LEA ! Pass number of roots as second parameter @@ -479,16 +456,6 @@ M:: word load-gc-root ( gc-root register temp -- ) %prepare-alien-invoke "inline_gc" f %alien-invoke ; -M:: x86 %gc ( temp1 temp2 gc-roots gc-root-count uninitialized-locs -- ) - "end" define-label - temp1 temp2 check-nursery - "end" get JLE - gc-roots temp1 save-gc-roots - uninitialized-locs wipe-locs - gc-root-count call-gc - gc-roots temp1 load-gc-roots - "end" resolve-label ; - M: x86 %alien-global [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ; From dd2dc2bb24ef116d7a2a27f1e7d2903e64a2f07c Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 21:44:22 -0500 Subject: [PATCH 20/21] cpu.ppc: Updating PowerPC backend for codegen changes over the last two months: new shift intrinsics added, fixnum overflow intrinsics are now treated like conditionals, GC checks are more complex and have a different API --- basis/cpu/ppc/ppc.factor | 144 ++++++++++++--------------------------- 1 file changed, 44 insertions(+), 100 deletions(-) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 003eccfa18..7ce73d2c4b 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -76,8 +76,12 @@ HOOK: reserved-area-size os ( -- n ) : xt-save ( n -- i ) 2 cells - ; ! Next, we have the spill area as well as the FFI parameter area. -! They overlap, since basic blocks with FFI calls will never -! spill. +! It is safe for them to overlap, since basic blocks with FFI calls +! will never spill -- indeed, basic blocks with FFI calls do not +! use vregs at all, and the FFI call is a stack analysis sync point. +! In the future this will change and the stack frame logic will +! need to be untangled somewhat. + : param@ ( n -- x ) reserved-area-size + ; inline : param-save-size ( -- n ) 8 cells ; foldable @@ -85,32 +89,30 @@ HOOK: reserved-area-size os ( -- n ) : local@ ( n -- x ) reserved-area-size param-save-size + + ; inline -: spill-integer-base ( -- n ) - stack-frame get spill-counts>> double-float-regs swap at - double-float-regs reg-size * ; - : spill-integer@ ( n -- offset ) - cells spill-integer-base + param@ ; + spill-integer-offset param@ ; : spill-float@ ( n -- offset ) - double-float-regs reg-size * param@ ; + spill-float-offset param@ ; ! Some FP intrinsics need a temporary scratch area in the stack -! frame, 8 bytes in size +! frame, 8 bytes in size. This is in the param-save area so it +! should not overlap with spill slots. : scratch@ ( n -- offset ) stack-frame get total-size>> factor-area-size - param-save-size - + ; +! GC root area +: gc-root@ ( n -- offset ) + gc-root-offset param@ ; + ! Finally we have the linkage area HOOK: lr-save os ( -- n ) M: ppc stack-frame-size ( stack-frame -- i ) - [ spill-counts>> [ swap reg-size * ] { } assoc>map sum ] - [ params>> ] - [ return>> ] - tri + + + (stack-frame-size) param-save-size + reserved-area-size + factor-area-size + @@ -176,95 +178,28 @@ M: ppc %or OR ; M: ppc %or-imm ORI ; M: ppc %xor XOR ; M: ppc %xor-imm XORI ; +M: ppc %shl SLW ; M: ppc %shl-imm swapd SLWI ; +M: ppc %shr-imm SRW ; M: ppc %shr-imm swapd SRWI ; +M: ppc %sar SRAW ; M: ppc %sar-imm SRAWI ; M: ppc %not NOT ; -: %alien-invoke-tail ( func dll -- ) - [ scratch-reg ] 2dip %alien-global scratch-reg MTCTR BCTR ; - -:: exchange-regs ( r1 r2 -- ) - scratch-reg r1 MR - r1 r2 MR - r2 scratch-reg MR ; - -: ?MR ( r1 r2 -- ) 2dup = [ 2drop ] [ MR ] if ; - -:: move>args ( src1 src2 -- ) - { - { [ src1 4 = ] [ 3 src2 ?MR 3 4 exchange-regs ] } - { [ src1 3 = ] [ 4 src2 ?MR ] } - { [ src2 3 = ] [ 4 src1 ?MR 3 4 exchange-regs ] } - { [ src2 4 = ] [ 3 src1 ?MR ] } - [ 3 src1 MR 4 src2 MR ] - } cond ; - -: clear-xer ( -- ) +:: overflow-template ( label dst src1 src2 insn -- ) 0 0 LI - 0 MTXER ; inline + 0 MTXER + dst src2 src1 insn call + label BNO ; inline -:: overflow-template ( src1 src2 insn func -- ) - "no-overflow" define-label - clear-xer - scratch-reg src2 src1 insn call - scratch-reg ds-reg 0 STW - "no-overflow" get BNO - src1 src2 move>args - %prepare-alien-invoke - func f %alien-invoke - "no-overflow" resolve-label ; inline +M: ppc %fixnum-add ( label dst src1 src2 -- ) + [ ADDO. ] overflow-template ; -:: overflow-template-tail ( src1 src2 insn func -- ) - "overflow" define-label - clear-xer - scratch-reg src2 src1 insn call - "overflow" get BO - scratch-reg ds-reg 0 STW - BLR - "overflow" resolve-label - src1 src2 move>args - %prepare-alien-invoke - func f %alien-invoke-tail ; inline +M: ppc %fixnum-sub ( label dst src1 src2 -- ) + [ SUBFO. ] overflow-template ; -M: ppc %fixnum-add ( src1 src2 -- ) - [ ADDO. ] "overflow_fixnum_add" overflow-template ; - -M: ppc %fixnum-add-tail ( src1 src2 -- ) - [ ADDO. ] "overflow_fixnum_add" overflow-template-tail ; - -M: ppc %fixnum-sub ( src1 src2 -- ) - [ SUBFO. ] "overflow_fixnum_subtract" overflow-template ; - -M: ppc %fixnum-sub-tail ( src1 src2 -- ) - [ SUBFO. ] "overflow_fixnum_subtract" overflow-template-tail ; - -M:: ppc %fixnum-mul ( src1 src2 temp1 temp2 -- ) - "no-overflow" define-label - clear-xer - temp1 src1 tag-bits get SRAWI - temp2 temp1 src2 MULLWO. - temp2 ds-reg 0 STW - "no-overflow" get BNO - src2 src2 tag-bits get SRAWI - temp1 src2 move>args - %prepare-alien-invoke - "overflow_fixnum_multiply" f %alien-invoke - "no-overflow" resolve-label ; - -M:: ppc %fixnum-mul-tail ( src1 src2 temp1 temp2 -- ) - "overflow" define-label - clear-xer - temp1 src1 tag-bits get SRAWI - temp2 temp1 src2 MULLWO. - "overflow" get BO - temp2 ds-reg 0 STW - BLR - "overflow" resolve-label - src2 src2 tag-bits get SRAWI - temp1 src2 move>args - %prepare-alien-invoke - "overflow_fixnum_multiply" f %alien-invoke-tail ; +M:: ppc %fixnum-mul ( label dst src1 src2 -- ) + [ MULLWO. ] overflow-template ; : bignum@ ( n -- offset ) cells bignum tag-number - ; inline @@ -462,17 +397,26 @@ M:: ppc %write-barrier ( src card# table -- ) src card# deck-bits SRWI table scratch-reg card# STBX ; -M:: ppc %gc ( temp1 temp2 gc-roots gc-root-count -- ) - "end" define-label +M:: ppc %check-nursery ( label temp1 temp2 -- ) temp2 load-zone-ptr temp1 temp2 cell LWZ temp2 temp2 3 cells LWZ - temp1 temp1 1024 ADDI ! add ALLOT_BUFFER_ZONE to here - temp1 0 temp2 CMP ! is here >= end? - "end" get BLE + ! add ALLOT_BUFFER_ZONE to here + temp1 temp1 1024 ADDI + ! is here >= end? + temp1 0 temp2 CMP + label BLE ; + +M:: ppc %save-gc-root ( gc-root register -- ) + register 1 gc-root gc-root@ STW ; + +M:: ppc %load-gc-root ( gc-root register -- ) + register 1 gc-root gc-root@ LWZ ; + +M:: ppc %call-gc ( gc-root-count -- ) %prepare-alien-invoke - 0 3 LI - 0 4 LI + 3 1 gc-root-base param@ ADDI + gc-root-count 4 LI "inline_gc" f %alien-invoke "end" resolve-label ; From 5e6936ec6980330cc776a8089b0fdbfa715b3121 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 30 Jul 2009 23:20:38 -0500 Subject: [PATCH 21/21] README.txt: minor updates suggested by mnestic --- README.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.txt b/README.txt index a33a85b218..016d60e68c 100755 --- a/README.txt +++ b/README.txt @@ -55,10 +55,13 @@ For X11 support, you need recent development libraries for libc, Pango, X11, and OpenGL. On a Debian-derived Linux distribution (like Ubuntu), you can use the following line to grab everything: - sudo apt-get install libc6-dev libpango-1.0-dev libx11-dev + sudo apt-get install libc6-dev libpango1.0-dev libx11-dev libgl1-mesa-dev + +Note that if you are using a proprietary OpenGL driver, you should +probably leave out the last package in the list. If your DISPLAY environment variable is set, the UI will start -automatically: +automatically when you run Factor: ./factor