From 08a2eb74f45566d38ed023765be8694074f9aa15 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 05:39:53 -0500 Subject: [PATCH 1/3] cpu.x86: shifts didn't work if dst != src1; re-organize file a bit --- .../cfg/ssa/destruction/destruction.factor | 2 +- basis/compiler/tests/codegen.factor | 6 + basis/cpu/x86/x86.factor | 767 +++++++++--------- 3 files changed, 390 insertions(+), 385 deletions(-) diff --git a/basis/compiler/cfg/ssa/destruction/destruction.factor b/basis/compiler/cfg/ssa/destruction/destruction.factor index 73f036b1b1..67570302d7 100644 --- a/basis/compiler/cfg/ssa/destruction/destruction.factor +++ b/basis/compiler/cfg/ssa/destruction/destruction.factor @@ -79,7 +79,7 @@ M: ##phi prepare-insn [ dst>> ] [ inputs>> values ] bi [ eliminate-copy ] with each ; - : prepare-block ( bb -- ) +: prepare-block ( bb -- ) instructions>> [ prepare-insn ] each ; : prepare-coalescing ( cfg -- ) diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor index 47061070bd..141fc24309 100644 --- a/basis/compiler/tests/codegen.factor +++ b/basis/compiler/tests/codegen.factor @@ -470,3 +470,9 @@ TUPLE: myseq { underlying1 byte-array read-only } { underlying2 byte-array read- 1 swap ] compile-call ] unit-test + +! Forgot to two-operand shifts +[ 2 0 ] [ + 1 1 + [ [ HEX: f bitand ] bi@ [ shift ] [ drop -3 shift ] 2bi ] compile-call +] unit-test \ No newline at end of file diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 8972860125..fd8dc70f89 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -156,6 +156,309 @@ M: x86 %fixnum-sub ( label dst src1 src2 -- ) M: x86 %fixnum-mul ( label dst src1 src2 -- ) int-rep two-operand swap IMUL2 JO ; +M: x86 %unbox-alien ( dst src -- ) + alien-offset [+] MOV ; + +M:: x86 %unbox-any-c-ptr ( dst src temp -- ) + [ + { "is-byte-array" "end" "start" } [ define-label ] each + dst 0 MOV + temp src MOV + ! We come back here with displaced aliens + "start" resolve-label + ! Is the object f? + temp \ f tag-number CMP + "end" get JE + ! Is the object an alien? + temp header-offset [+] alien type-number tag-fixnum CMP + "is-byte-array" get JNE + ! If so, load the offset and add it to the address + dst temp alien-offset [+] ADD + ! Now recurse on the underlying alien + temp temp underlying-alien-offset [+] MOV + "start" get JMP + "is-byte-array" resolve-label + ! Add byte array address to address being computed + dst temp ADD + ! Add an offset to start of byte array's data + dst byte-array-offset ADD + "end" resolve-label + ] with-scope ; + +: alien@ ( reg n -- op ) cells alien tag-number - [+] ; + +:: %allot-alien ( dst displacement base temp -- ) + dst 4 cells alien temp %allot + dst 1 alien@ base MOV ! alien + dst 2 alien@ \ f tag-number MOV ! expired + dst 3 alien@ displacement MOV ! displacement + ; + +M:: x86 %box-alien ( dst src temp -- ) + [ + "end" define-label + dst \ f tag-number MOV + src 0 CMP + "end" get JE + dst src \ f tag-number temp %allot-alien + "end" resolve-label + ] with-scope ; + +M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- ) + [ + "end" define-label + "ok" define-label + ! If displacement is zero, return the base + dst base MOV + displacement 0 CMP + "end" get JE + ! Quickly use displacement' before its needed for real, as allot temporary + dst 4 cells alien displacement' %allot + ! If base is already a displaced alien, unpack it + base' base MOV + displacement' displacement MOV + base \ f tag-number CMP + "ok" get JE + base header-offset [+] alien type-number tag-fixnum CMP + "ok" get JNE + ! displacement += base.displacement + displacement' base 3 alien@ ADD + ! base = base.base + base' base 1 alien@ MOV + "ok" resolve-label + dst 1 alien@ base' MOV ! alien + dst 2 alien@ \ f tag-number MOV ! expired + dst 3 alien@ displacement' MOV ! displacement + "end" resolve-label + ] with-scope ; + +! The 'small-reg' mess is pretty crappy, but its only used on x86-32. +! On x86-64, all registers have 8-bit versions. However, a similar +! problem arises for shifts, where the shift count must be in CL, and +! so one day I will fix this properly by adding precoloring to the +! register allocator. + +HOOK: has-small-reg? cpu ( reg size -- ? ) + +CONSTANT: have-byte-regs { EAX ECX EDX EBX } + +M: x86.32 has-small-reg? + { + { 8 [ have-byte-regs memq? ] } + { 16 [ drop t ] } + { 32 [ drop t ] } + } case ; + +M: x86.64 has-small-reg? 2drop t ; + +: small-reg-that-isn't ( exclude -- reg' ) + [ have-byte-regs ] dip + [ native-version-of ] map + '[ _ memq? not ] find nip ; + +: with-save/restore ( reg quot -- ) + [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline + +:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- ) + ! If the destination register overlaps a small register with + ! 'size' bits, we call the quot with that. Otherwise, we find a + ! small register that is not in exclude, and call quot, saving and + ! restoring the small register. + dst size has-small-reg? [ dst quot call ] [ + exclude small-reg-that-isn't + [ quot call ] with-save/restore + ] if ; inline + +M:: x86 %string-nth ( dst src index temp -- ) + ! We request a small-reg of size 8 since those of size 16 are + ! a superset. + "end" define-label + dst { src index temp } 8 [| new-dst | + ! Load the least significant 7 bits into new-dst. + ! 8th bit indicates whether we have to load from + ! the aux vector or not. + temp src index [+] LEA + new-dst 8-bit-version-of temp string-offset [+] MOV + new-dst new-dst 8-bit-version-of MOVZX + ! Do we have to look at the aux vector? + new-dst HEX: 80 CMP + "end" get JL + ! Yes, this is a non-ASCII character. Load aux vector + temp src string-aux-offset [+] MOV + new-dst temp XCHG + ! Compute index + new-dst index ADD + new-dst index ADD + ! Load high 16 bits + new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV + new-dst new-dst 16-bit-version-of MOVZX + new-dst 7 SHL + ! Compute code point + new-dst temp XOR + "end" resolve-label + dst new-dst int-rep %copy + ] with-small-register ; + +M:: x86 %set-string-nth-fast ( ch str index temp -- ) + ch { index str temp } 8 [| new-ch | + new-ch ch int-rep %copy + temp str index [+] LEA + temp string-offset [+] new-ch 8-bit-version-of MOV + ] with-small-register ; + +:: %alien-integer-getter ( dst src size quot -- ) + dst { src } size [| new-dst | + new-dst dup size n-bit-version-of dup src [] MOV + quot call + dst new-dst int-rep %copy + ] with-small-register ; inline + +: %alien-unsigned-getter ( dst src size -- ) + [ MOVZX ] %alien-integer-getter ; inline + +M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ; +M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ; +M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ; + +: %alien-signed-getter ( dst src size -- ) + [ MOVSX ] %alien-integer-getter ; inline + +M: x86 %alien-signed-1 8 %alien-signed-getter ; +M: x86 %alien-signed-2 16 %alien-signed-getter ; +M: x86 %alien-signed-4 32 %alien-signed-getter ; + +M: x86 %alien-cell [] MOV ; +M: x86 %alien-float [] MOVSS ; +M: x86 %alien-double [] MOVSD ; +M: x86 %alien-vector [ [] ] dip %copy ; + +:: %alien-integer-setter ( ptr value size -- ) + value { ptr } size [| new-value | + new-value value int-rep %copy + ptr [] new-value size n-bit-version-of MOV + ] with-small-register ; inline + +M: x86 %set-alien-integer-1 8 %alien-integer-setter ; +M: x86 %set-alien-integer-2 16 %alien-integer-setter ; +M: x86 %set-alien-integer-4 32 %alien-integer-setter ; +M: x86 %set-alien-cell [ [] ] dip MOV ; +M: x86 %set-alien-float [ [] ] dip MOVSS ; +M: x86 %set-alien-double [ [] ] dip MOVSD ; +M: x86 %set-alien-vector [ [] ] 2dip %copy ; + +: shift-count? ( reg -- ? ) { ECX RCX } memq? ; + +:: emit-shift ( dst src quot -- ) + src shift-count? [ + dst CL quot call + ] [ + dst shift-count? [ + dst src XCHG + src CL quot call + dst src XCHG + ] [ + ECX native-version-of [ + CL src MOV + drop dst CL quot call + ] with-save/restore + ] if + ] if ; inline + +M: x86 %shl int-rep two-operand [ SHL ] emit-shift ; +M: x86 %shr int-rep two-operand [ SHR ] emit-shift ; +M: x86 %sar int-rep two-operand [ SAR ] emit-shift ; + +M: x86 %vm-field-ptr ( dst field -- ) + [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ] + [ vm-field-offset ADD ] 2bi ; + +: load-zone-ptr ( reg -- ) + #! Load pointer to start of zone array + "nursery" %vm-field-ptr ; + +: load-allot-ptr ( nursery-ptr allot-ptr -- ) + [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ; + +: inc-allot-ptr ( nursery-ptr n -- ) + [ cell [+] ] dip 8 align ADD ; + +: store-header ( temp class -- ) + [ [] ] [ type-number tag-fixnum ] bi* MOV ; + +: store-tagged ( dst tag -- ) + tag-number OR ; + +M:: x86 %allot ( dst size class nursery-ptr -- ) + nursery-ptr dst load-allot-ptr + dst class store-header + dst class store-tagged + nursery-ptr size inc-allot-ptr ; + +M:: x86 %write-barrier ( src card# table -- ) + #! Mark the card pointed to by vreg. + ! Mark the card + card# src MOV + card# card-bits SHR + table "cards_offset" %vm-field-ptr + table table [] MOV + table card# [+] card-mark MOV + + ! Mark the card deck + card# deck-bits card-bits - SHR + table "decks_offset" %vm-field-ptr + table table [] MOV + table card# [+] card-mark MOV ; + +M:: x86 %check-nursery ( label temp1 temp2 -- ) + temp1 load-zone-ptr + temp2 temp1 cell [+] MOV + temp2 1024 ADD + temp1 temp1 3 cells [+] MOV + temp2 temp1 CMP + label JLE ; + +M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ; + +M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ; + +M: x86 %alien-global ( dst symbol library -- ) + [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ; + +M: x86 %epilogue ( n -- ) cell - incr-stack-reg ; + +:: %boolean ( dst temp word -- ) + dst \ f tag-number MOV + temp 0 MOV \ t rc-absolute-cell rel-immediate + dst temp word execute ; inline + +M:: x86 %compare ( dst src1 src2 cc temp -- ) + src1 src2 CMP + cc order-cc { + { cc< [ dst temp \ CMOVL %boolean ] } + { cc<= [ dst temp \ CMOVLE %boolean ] } + { cc> [ dst temp \ CMOVG %boolean ] } + { cc>= [ dst temp \ CMOVGE %boolean ] } + { cc= [ dst temp \ CMOVE %boolean ] } + { cc/= [ dst temp \ CMOVNE %boolean ] } + } case ; + +M: x86 %compare-imm ( dst src1 src2 cc temp -- ) + %compare ; + +M:: x86 %compare-branch ( label src1 src2 cc -- ) + src1 src2 CMP + cc order-cc { + { cc< [ label JL ] } + { cc<= [ label JLE ] } + { cc> [ label JG ] } + { cc>= [ label JGE ] } + { cc= [ label JE ] } + { cc/= [ label JNE ] } + } case ; + +M: x86 %compare-imm-branch ( label src1 src2 cc -- ) + %compare-branch ; + M: x86 %add-float double-rep two-operand ADDSD ; M: x86 %sub-float double-rep two-operand SUBSD ; M: x86 %mul-float double-rep two-operand MULSD ; @@ -177,6 +480,86 @@ M:: x86 %box-float ( dst src temp -- ) dst 16 float temp %allot dst float-offset [+] src MOVSD ; +: %cmov-float= ( dst src -- ) + [ + "no-move" define-label + + "no-move" get [ JNE ] [ JP ] bi + MOV + "no-move" resolve-label + ] with-scope ; + +: %cmov-float/= ( dst src -- ) + [ + "no-move" define-label + "move" define-label + + "move" get JP + "no-move" get JE + "move" resolve-label + MOV + "no-move" resolve-label + ] with-scope ; + +:: (%compare-float) ( dst src1 src2 cc temp compare -- ) + cc { + { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] } + { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] } + { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] } + { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] } + { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] } + { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] } + { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] } + { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] } + { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] } + { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] } + { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] } + { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] } + { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] } + { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] } + } case ; inline + +M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- ) + \ COMISD (%compare-float) ; + +M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- ) + \ UCOMISD (%compare-float) ; + +: %jump-float= ( label -- ) + [ + "no-jump" define-label + "no-jump" get JP + JE + "no-jump" resolve-label + ] with-scope ; + +: %jump-float/= ( label -- ) + [ JNE ] [ JP ] bi ; + +:: (%compare-float-branch) ( label src1 src2 cc compare -- ) + cc { + { cc< [ src2 src1 \ compare execute( a b -- ) label JA ] } + { cc<= [ src2 src1 \ compare execute( a b -- ) label JAE ] } + { cc> [ src1 src2 \ compare execute( a b -- ) label JA ] } + { cc>= [ src1 src2 \ compare execute( a b -- ) label JAE ] } + { cc= [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] } + { cc<> [ src1 src2 \ compare execute( a b -- ) label JNE ] } + { cc<>= [ src1 src2 \ compare execute( a b -- ) label JNP ] } + { cc/< [ src2 src1 \ compare execute( a b -- ) label JBE ] } + { cc/<= [ src2 src1 \ compare execute( a b -- ) label JB ] } + { cc/> [ src1 src2 \ compare execute( a b -- ) label JBE ] } + { cc/>= [ src1 src2 \ compare execute( a b -- ) label JB ] } + { cc/= [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] } + { cc/<> [ src1 src2 \ compare execute( a b -- ) label JE ] } + { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP ] } + } case ; + +M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- ) + \ COMISD (%compare-float-branch) ; + +M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- ) + \ UCOMISD (%compare-float-branch) ; + M:: x86 %box-vector ( dst src rep temp -- ) dst rep rep-size 2 cells + byte-array temp %allot 16 tag-fixnum dst 1 byte-array tag-number %set-slot-imm @@ -543,390 +926,6 @@ M: x86 %integer>scalar drop MOVD ; M: x86 %scalar>integer drop MOVD ; -M: x86 %unbox-alien ( dst src -- ) - alien-offset [+] MOV ; - -M:: x86 %unbox-any-c-ptr ( dst src temp -- ) - [ - { "is-byte-array" "end" "start" } [ define-label ] each - dst 0 MOV - temp src MOV - ! We come back here with displaced aliens - "start" resolve-label - ! Is the object f? - temp \ f tag-number CMP - "end" get JE - ! Is the object an alien? - temp header-offset [+] alien type-number tag-fixnum CMP - "is-byte-array" get JNE - ! If so, load the offset and add it to the address - dst temp alien-offset [+] ADD - ! Now recurse on the underlying alien - temp temp underlying-alien-offset [+] MOV - "start" get JMP - "is-byte-array" resolve-label - ! Add byte array address to address being computed - dst temp ADD - ! Add an offset to start of byte array's data - dst byte-array-offset ADD - "end" resolve-label - ] with-scope ; - -: alien@ ( reg n -- op ) cells alien tag-number - [+] ; - -:: %allot-alien ( dst displacement base temp -- ) - dst 4 cells alien temp %allot - dst 1 alien@ base MOV ! alien - dst 2 alien@ \ f tag-number MOV ! expired - dst 3 alien@ displacement MOV ! displacement - ; - -M:: x86 %box-alien ( dst src temp -- ) - [ - "end" define-label - dst \ f tag-number MOV - src 0 CMP - "end" get JE - dst src \ f tag-number temp %allot-alien - "end" resolve-label - ] with-scope ; - -M:: x86 %box-displaced-alien ( dst displacement base displacement' base' base-class -- ) - [ - "end" define-label - "ok" define-label - ! If displacement is zero, return the base - dst base MOV - displacement 0 CMP - "end" get JE - ! Quickly use displacement' before its needed for real, as allot temporary - dst 4 cells alien displacement' %allot - ! If base is already a displaced alien, unpack it - base' base MOV - displacement' displacement MOV - base \ f tag-number CMP - "ok" get JE - base header-offset [+] alien type-number tag-fixnum CMP - "ok" get JNE - ! displacement += base.displacement - displacement' base 3 alien@ ADD - ! base = base.base - base' base 1 alien@ MOV - "ok" resolve-label - dst 1 alien@ base' MOV ! alien - dst 2 alien@ \ f tag-number MOV ! expired - dst 3 alien@ displacement' MOV ! displacement - "end" resolve-label - ] with-scope ; - -! The 'small-reg' mess is pretty crappy, but its only used on x86-32. -! On x86-64, all registers have 8-bit versions. However, a similar -! problem arises for shifts, where the shift count must be in CL, and -! so one day I will fix this properly by adding precoloring to the -! register allocator. - -HOOK: has-small-reg? cpu ( reg size -- ? ) - -CONSTANT: have-byte-regs { EAX ECX EDX EBX } - -M: x86.32 has-small-reg? - { - { 8 [ have-byte-regs memq? ] } - { 16 [ drop t ] } - { 32 [ drop t ] } - } case ; - -M: x86.64 has-small-reg? 2drop t ; - -: small-reg-that-isn't ( exclude -- reg' ) - [ have-byte-regs ] dip - [ native-version-of ] map - '[ _ memq? not ] find nip ; - -: with-save/restore ( reg quot -- ) - [ drop PUSH ] [ call ] [ drop POP ] 2tri ; inline - -:: with-small-register ( dst exclude size quot: ( new-dst -- ) -- ) - ! If the destination register overlaps a small register with - ! 'size' bits, we call the quot with that. Otherwise, we find a - ! small register that is not in exclude, and call quot, saving and - ! restoring the small register. - dst size has-small-reg? [ dst quot call ] [ - exclude small-reg-that-isn't - [ quot call ] with-save/restore - ] if ; inline - -M:: x86 %string-nth ( dst src index temp -- ) - ! We request a small-reg of size 8 since those of size 16 are - ! a superset. - "end" define-label - dst { src index temp } 8 [| new-dst | - ! Load the least significant 7 bits into new-dst. - ! 8th bit indicates whether we have to load from - ! the aux vector or not. - temp src index [+] LEA - new-dst 8-bit-version-of temp string-offset [+] MOV - new-dst new-dst 8-bit-version-of MOVZX - ! Do we have to look at the aux vector? - new-dst HEX: 80 CMP - "end" get JL - ! Yes, this is a non-ASCII character. Load aux vector - temp src string-aux-offset [+] MOV - new-dst temp XCHG - ! Compute index - new-dst index ADD - new-dst index ADD - ! Load high 16 bits - new-dst 16-bit-version-of new-dst byte-array-offset [+] MOV - new-dst new-dst 16-bit-version-of MOVZX - new-dst 7 SHL - ! Compute code point - new-dst temp XOR - "end" resolve-label - dst new-dst int-rep %copy - ] with-small-register ; - -M:: x86 %set-string-nth-fast ( ch str index temp -- ) - ch { index str temp } 8 [| new-ch | - new-ch ch int-rep %copy - temp str index [+] LEA - temp string-offset [+] new-ch 8-bit-version-of MOV - ] with-small-register ; - -:: %alien-integer-getter ( dst src size quot -- ) - dst { src } size [| new-dst | - new-dst dup size n-bit-version-of dup src [] MOV - quot call - dst new-dst int-rep %copy - ] with-small-register ; inline - -: %alien-unsigned-getter ( dst src size -- ) - [ MOVZX ] %alien-integer-getter ; inline - -M: x86 %alien-unsigned-1 8 %alien-unsigned-getter ; -M: x86 %alien-unsigned-2 16 %alien-unsigned-getter ; -M: x86 %alien-unsigned-4 32 [ 2drop ] %alien-integer-getter ; - -: %alien-signed-getter ( dst src size -- ) - [ MOVSX ] %alien-integer-getter ; inline - -M: x86 %alien-signed-1 8 %alien-signed-getter ; -M: x86 %alien-signed-2 16 %alien-signed-getter ; -M: x86 %alien-signed-4 32 %alien-signed-getter ; - -M: x86 %alien-cell [] MOV ; -M: x86 %alien-float [] MOVSS ; -M: x86 %alien-double [] MOVSD ; -M: x86 %alien-vector [ [] ] dip %copy ; - -:: %alien-integer-setter ( ptr value size -- ) - value { ptr } size [| new-value | - new-value value int-rep %copy - ptr [] new-value size n-bit-version-of MOV - ] with-small-register ; inline - -M: x86 %set-alien-integer-1 8 %alien-integer-setter ; -M: x86 %set-alien-integer-2 16 %alien-integer-setter ; -M: x86 %set-alien-integer-4 32 %alien-integer-setter ; -M: x86 %set-alien-cell [ [] ] dip MOV ; -M: x86 %set-alien-float [ [] ] dip MOVSS ; -M: x86 %set-alien-double [ [] ] dip MOVSD ; -M: x86 %set-alien-vector [ [] ] 2dip %copy ; - -: shift-count? ( reg -- ? ) { ECX RCX } memq? ; - -:: emit-shift ( dst src1 src2 quot -- ) - src2 shift-count? [ - dst CL quot call - ] [ - dst shift-count? [ - dst src2 XCHG - src2 CL quot call - dst src2 XCHG - ] [ - ECX native-version-of [ - CL src2 MOV - drop dst CL quot call - ] with-save/restore - ] if - ] if ; inline - -M: x86 %shl [ SHL ] emit-shift ; -M: x86 %shr [ SHR ] emit-shift ; -M: x86 %sar [ SAR ] emit-shift ; - -M: x86 %vm-field-ptr ( dst field -- ) - [ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ] - [ vm-field-offset ADD ] 2bi ; - -: load-zone-ptr ( reg -- ) - #! Load pointer to start of zone array - "nursery" %vm-field-ptr ; - -: load-allot-ptr ( nursery-ptr allot-ptr -- ) - [ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ; - -: inc-allot-ptr ( nursery-ptr n -- ) - [ cell [+] ] dip 8 align ADD ; - -: store-header ( temp class -- ) - [ [] ] [ type-number tag-fixnum ] bi* MOV ; - -: store-tagged ( dst tag -- ) - tag-number OR ; - -M:: x86 %allot ( dst size class nursery-ptr -- ) - nursery-ptr dst load-allot-ptr - dst class store-header - dst class store-tagged - nursery-ptr size inc-allot-ptr ; - - -M:: x86 %write-barrier ( src card# table -- ) - #! Mark the card pointed to by vreg. - ! Mark the card - card# src MOV - card# card-bits SHR - table "cards_offset" %vm-field-ptr - table table [] MOV - table card# [+] card-mark MOV - - ! Mark the card deck - card# deck-bits card-bits - SHR - table "decks_offset" %vm-field-ptr - table table [] MOV - table card# [+] card-mark MOV ; - -M:: x86 %check-nursery ( label temp1 temp2 -- ) - temp1 load-zone-ptr - temp2 temp1 cell [+] MOV - temp2 1024 ADD - temp1 temp1 3 cells [+] MOV - temp2 temp1 CMP - label JLE ; - -M: x86 %save-gc-root ( gc-root register -- ) [ gc-root@ ] dip MOV ; - -M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ; - -M: x86 %alien-global ( dst symbol library -- ) - [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ; - -M: x86 %epilogue ( n -- ) cell - incr-stack-reg ; - -:: %boolean ( dst temp word -- ) - dst \ f tag-number MOV - temp 0 MOV \ t rc-absolute-cell rel-immediate - dst temp word execute ; inline - -M:: x86 %compare ( dst src1 src2 cc temp -- ) - src1 src2 CMP - cc order-cc { - { cc< [ dst temp \ CMOVL %boolean ] } - { cc<= [ dst temp \ CMOVLE %boolean ] } - { cc> [ dst temp \ CMOVG %boolean ] } - { cc>= [ dst temp \ CMOVGE %boolean ] } - { cc= [ dst temp \ CMOVE %boolean ] } - { cc/= [ dst temp \ CMOVNE %boolean ] } - } case ; - -M: x86 %compare-imm ( dst src1 src2 cc temp -- ) - %compare ; - -: %cmov-float= ( dst src -- ) - [ - "no-move" define-label - - "no-move" get [ JNE ] [ JP ] bi - MOV - "no-move" resolve-label - ] with-scope ; - -: %cmov-float/= ( dst src -- ) - [ - "no-move" define-label - "move" define-label - - "move" get JP - "no-move" get JE - "move" resolve-label - MOV - "no-move" resolve-label - ] with-scope ; - -:: (%compare-float) ( dst src1 src2 cc temp compare -- ) - cc { - { cc< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] } - { cc<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] } - { cc> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVA %boolean ] } - { cc>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVAE %boolean ] } - { cc= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float= %boolean ] } - { cc<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNE %boolean ] } - { cc<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVNP %boolean ] } - { cc/< [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] } - { cc/<= [ src2 src1 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] } - { cc/> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVBE %boolean ] } - { cc/>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVB %boolean ] } - { cc/= [ src1 src2 \ compare execute( a b -- ) dst temp \ %cmov-float/= %boolean ] } - { cc/<> [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVE %boolean ] } - { cc/<>= [ src1 src2 \ compare execute( a b -- ) dst temp \ CMOVP %boolean ] } - } case ; inline - -M: x86 %compare-float-ordered ( dst src1 src2 cc temp -- ) - \ COMISD (%compare-float) ; - -M: x86 %compare-float-unordered ( dst src1 src2 cc temp -- ) - \ UCOMISD (%compare-float) ; - -M:: x86 %compare-branch ( label src1 src2 cc -- ) - src1 src2 CMP - cc order-cc { - { cc< [ label JL ] } - { cc<= [ label JLE ] } - { cc> [ label JG ] } - { cc>= [ label JGE ] } - { cc= [ label JE ] } - { cc/= [ label JNE ] } - } case ; - -M: x86 %compare-imm-branch ( label src1 src2 cc -- ) - %compare-branch ; - -: %jump-float= ( label -- ) - [ - "no-jump" define-label - "no-jump" get JP - JE - "no-jump" resolve-label - ] with-scope ; - -: %jump-float/= ( label -- ) - [ JNE ] [ JP ] bi ; - -:: (%compare-float-branch) ( label src1 src2 cc compare -- ) - cc { - { cc< [ src2 src1 \ compare execute( a b -- ) label JA ] } - { cc<= [ src2 src1 \ compare execute( a b -- ) label JAE ] } - { cc> [ src1 src2 \ compare execute( a b -- ) label JA ] } - { cc>= [ src1 src2 \ compare execute( a b -- ) label JAE ] } - { cc= [ src1 src2 \ compare execute( a b -- ) label %jump-float= ] } - { cc<> [ src1 src2 \ compare execute( a b -- ) label JNE ] } - { cc<>= [ src1 src2 \ compare execute( a b -- ) label JNP ] } - { cc/< [ src2 src1 \ compare execute( a b -- ) label JBE ] } - { cc/<= [ src2 src1 \ compare execute( a b -- ) label JB ] } - { cc/> [ src1 src2 \ compare execute( a b -- ) label JBE ] } - { cc/>= [ src1 src2 \ compare execute( a b -- ) label JB ] } - { cc/= [ src1 src2 \ compare execute( a b -- ) label %jump-float/= ] } - { cc/<> [ src1 src2 \ compare execute( a b -- ) label JE ] } - { cc/<>= [ src1 src2 \ compare execute( a b -- ) label JP ] } - } case ; - -M: x86 %compare-float-ordered-branch ( label src1 src2 cc -- ) - \ COMISD (%compare-float-branch) ; - -M: x86 %compare-float-unordered-branch ( label src1 src2 cc -- ) - \ UCOMISD (%compare-float-branch) ; - M:: x86 %spill ( src rep dst -- ) dst src rep %copy ; M:: x86 %reload ( dst rep src -- ) dst src rep %copy ; From 7ee8144259922bd9f6f1a73faad8f1ffe55d87bf Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 05:42:41 -0500 Subject: [PATCH 2/3] alien.parser: fix unit tests --- basis/alien/parser/parser-tests.factor | 40 ++++++++++++++------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/basis/alien/parser/parser-tests.factor b/basis/alien/parser/parser-tests.factor index b9ef08e890..195cbb78a2 100644 --- a/basis/alien/parser/parser-tests.factor +++ b/basis/alien/parser/parser-tests.factor @@ -1,27 +1,31 @@ ! (c)2009 Joe Groff bsd license USING: accessors alien.c-types alien.parser alien.syntax -tools.test vocabs.parser ; +tools.test vocabs.parser parser ; IN: alien.parser.tests TYPEDEF: char char2 -[ int ] [ "int" parse-c-type ] unit-test -[ { int 5 } ] [ "int[5]" parse-c-type ] unit-test -[ { int 5 10 11 } ] [ "int[5][10][11]" parse-c-type ] unit-test -[ void* ] [ "int*" parse-c-type ] unit-test -[ void* ] [ "int**" parse-c-type ] unit-test -[ void* ] [ "int***" parse-c-type ] unit-test -[ void* ] [ "int****" parse-c-type ] unit-test -[ char* ] [ "char*" parse-c-type ] unit-test -[ void* ] [ "char**" parse-c-type ] unit-test -[ void* ] [ "char***" parse-c-type ] unit-test -[ void* ] [ "char****" parse-c-type ] unit-test -[ char2 ] [ "char2" parse-c-type ] unit-test -[ char* ] [ "char2*" parse-c-type ] unit-test - SYMBOL: not-c-type -[ "not-c-type" parse-c-type ] [ no-c-type? ] must-fail-with -! uncomment this when string C type parsing goes away -! [ "not-word" parse-c-type ] [ error>> no-word-error? ] must-fail-with +[ + "alien.parser.tests" use-vocab + "alien.c-types" use-vocab + [ int ] [ "int" parse-c-type ] unit-test + [ { int 5 } ] [ "int[5]" parse-c-type ] unit-test + [ { int 5 10 11 } ] [ "int[5][10][11]" parse-c-type ] unit-test + [ void* ] [ "int*" parse-c-type ] unit-test + [ void* ] [ "int**" parse-c-type ] unit-test + [ void* ] [ "int***" parse-c-type ] unit-test + [ void* ] [ "int****" parse-c-type ] unit-test + [ char* ] [ "char*" parse-c-type ] unit-test + [ void* ] [ "char**" parse-c-type ] unit-test + [ void* ] [ "char***" parse-c-type ] unit-test + [ void* ] [ "char****" parse-c-type ] unit-test + [ char2 ] [ "char2" parse-c-type ] unit-test + [ char* ] [ "char2*" parse-c-type ] unit-test + + [ "not-c-type" parse-c-type ] [ no-c-type? ] must-fail-with + [ "not-word" parse-c-type ] [ error>> no-word-error? ] must-fail-with + +] with-file-vocabs \ No newline at end of file From 9a06e6f42458202fd017f0a91b245bb96e81d548 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 28 Sep 2009 06:34:22 -0500 Subject: [PATCH 3/3] math.vectors.simd: add intrinsic for int-4-boa, uint-4-boa, fix tests for C type parser change, fix software fallback for horizontal shifts --- basis/cpu/x86/x86.factor | 17 ++++++++++--- .../math/vectors/simd/functor/functor.factor | 8 +++--- basis/math/vectors/simd/simd-tests.factor | 24 +++++++++--------- basis/math/vectors/simd/simd.factor | 25 ++++++------------- basis/math/vectors/vectors.factor | 8 +++--- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index fd8dc70f89..63356aa5bb 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -592,9 +592,9 @@ M: x86 %broadcast-vector-reps } available-reps ; M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) - rep { + { { - float-4-rep + [ rep float-4-rep eq? ] [ dst src1 float-4-rep %copy dst src2 UNPCKLPS @@ -602,13 +602,22 @@ M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- ) dst src3 MOVLHPS ] } - } case ; + { + [ rep { int-4-rep uint-4-rep } memq? ] + [ + dst src1 int-4-rep %copy + dst src2 PUNPCKLDQ + src3 src4 PUNPCKLDQ + dst src3 PUNPCKLQDQ + ] + } + } cond ; M: x86 %gather-vector-4-reps { ! Can't do this with sse1 since it will want to unbox ! double-precision floats and convert to single precision - { sse2? { float-4-rep } } + { sse2? { float-4-rep int-4-rep uint-4-rep } } } available-reps ; M:: x86 %gather-vector-2 ( dst src1 src2 rep -- ) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 6ed74caa1f..bc42bddf02 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien.c-types assocs byte-arrays classes -effects fry functors generalizations kernel literals locals -math math.functions math.vectors math.vectors.simd.intrinsics +USING: accessors alien.c-types assocs byte-arrays classes effects fry +functors generalizations kernel literals locals math math.functions +math.vectors math.vectors.private math.vectors.simd.intrinsics math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture namespaces arrays quotations ; @@ -141,6 +141,8 @@ M: A set-nth-unsafe underlying>> SET-NTH call ; inline M: A like drop dup \ A instance? [ >A ] unless ; inline +M: A new-underlying drop \ A boa ; inline + M: A new-sequence drop dup N = [ drop 16 \ A boa ] diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 001f1be814..c1428b9c33 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -6,18 +6,18 @@ tools.test vocabs assocs compiler.cfg.debugger words locals math.vectors.specialization combinators cpu.architecture math.vectors.simd.intrinsics namespaces byte-arrays alien specialized-arrays classes.struct eval ; -FROM: alien.c-types => c-type-boxed-class ; -SPECIALIZED-ARRAY: float -SIMD: char -SIMD: uchar -SIMD: short -SIMD: ushort -SIMD: int -SIMD: uint -SIMD: longlong -SIMD: ulonglong -SIMD: float -SIMD: double +QUALIFIED-WITH: alien.c-types c +SPECIALIZED-ARRAY: c:float +SIMD: c:char +SIMD: c:uchar +SIMD: c:short +SIMD: c:ushort +SIMD: c:int +SIMD: c:uint +SIMD: c:longlong +SIMD: c:ulonglong +SIMD: c:float +SIMD: c:double IN: math.vectors.simd.tests ! Make sure the functor doesn't generate bogus vocabularies diff --git a/basis/math/vectors/simd/simd.factor b/basis/math/vectors/simd/simd.factor index 71936b2657..e7d4f0e94b 100644 --- a/basis/math/vectors/simd/simd.factor +++ b/basis/math/vectors/simd/simd.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: alien.c-types combinators fry kernel lexer math math.parser +USING: alien.c-types combinators fry kernel parser math math.parser math.vectors.simd.functor sequences splitting vocabs.generated -vocabs.loader vocabs.parser words ; +vocabs.loader vocabs.parser words accessors ; QUALIFIED-WITH: alien.c-types c IN: math.vectors.simd @@ -11,22 +11,11 @@ ERROR: bad-base-type type ; > "math.vectors.simd.instances." prepend ; -: parse-base-type ( string -- c-type ) - { - { "char" [ c:char ] } - { "uchar" [ c:uchar ] } - { "short" [ c:short ] } - { "ushort" [ c:ushort ] } - { "int" [ c:int ] } - { "uint" [ c:uint ] } - { "longlong" [ c:longlong ] } - { "ulonglong" [ c:ulonglong ] } - { "float" [ c:float ] } - { "double" [ c:double ] } - [ bad-base-type ] - } case ; +: parse-base-type ( c-type -- c-type ) + dup { c:char c:uchar c:short c:ushort c:int c:uint c:longlong c:ulonglong c:float c:double } memq? + [ bad-base-type ] unless ; PRIVATE> @@ -38,4 +27,4 @@ PRIVATE> ] generate-vocab ; SYNTAX: SIMD: - scan define-simd-vocab use-vocab ; + scan-word define-simd-vocab use-vocab ; diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 1dcff8e8a9..de9ba51aec 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -64,6 +64,8 @@ PRIVATE> : bitandn ( x y -- z ) [ bitnot ] dip bitand ; inline +GENERIC: new-underlying ( underlying seq -- seq' ) + PRIVATE> : vbitand ( u v -- w ) over '[ _ [ bitand ] fp-bitwise-op ] 2map ; @@ -90,12 +92,10 @@ PRIVATE> : vrshift ( u n -- w ) neg '[ _ shift ] map ; : hlshift ( u n -- w ) - [ clone ] dip - '[ _ append 16 tail* ] change-underlying ; + [ [ underlying>> ] dip prepend 16 head ] [ drop ] 2bi new-underlying ; : hrshift ( u n -- w ) - [ clone ] dip - '[ _ prepend 16 head* ] change-underlying ; + [ [ underlying>> ] dip append 16 tail* ] [ drop ] 2bi new-underlying ; : vfloor ( u -- v ) [ floor ] map ; : vceiling ( u -- v ) [ ceiling ] map ;