From e256846acd7608532c0ca686b92e2842b18a0401 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 5 Dec 2008 06:38:51 -0600 Subject: [PATCH] Tweak string representation; high bit indicates if character has high bits in aux vector. Avoids memory access in common case. Split set-string-nth into two primitives; set-string-nth-fast is open-coded by optimizing compiler. 13% improvement on reverse-complement --- basis/bootstrap/image/image.factor | 5 ++ basis/compiler/cfg/def-use/def-use.factor | 2 + .../cfg/instructions/instructions.factor | 1 + .../compiler/cfg/intrinsics/intrinsics.factor | 2 + .../cfg/intrinsics/slots/slots.factor | 4 + basis/compiler/codegen/codegen.factor | 8 ++ basis/cpu/architecture/architecture.factor | 1 + basis/cpu/x86/x86.factor | 23 ++++- .../known-words/known-words.factor | 3 +- core/bootstrap/primitives.factor | 3 +- core/strings/strings.factor | 11 ++- vm/primitives.c | 3 +- vm/types.c | 84 ++++++++++++------- vm/types.h | 3 +- 14 files changed, 113 insertions(+), 40 deletions(-) diff --git a/basis/bootstrap/image/image.factor b/basis/bootstrap/image/image.factor index f352a4a254..380c9b2348 100644 --- a/basis/bootstrap/image/image.factor +++ b/basis/bootstrap/image/image.factor @@ -351,7 +351,12 @@ M: wrapper ' : pad-bytes ( seq -- newseq ) dup length bootstrap-cell align 0 pad-right ; +: check-string ( string -- ) + [ 127 > ] contains? + [ "Bootstrap cannot emit non-ASCII strings" throw ] when ; + : emit-string ( string -- ptr ) + dup check-string string type-number object tag-number [ dup length emit-fixnum f ' emit diff --git a/basis/compiler/cfg/def-use/def-use.factor b/basis/compiler/cfg/def-use/def-use.factor index 3825ae480e..068a6a6377 100644 --- a/basis/compiler/cfg/def-use/def-use.factor +++ b/basis/compiler/cfg/def-use/def-use.factor @@ -15,6 +15,7 @@ M: ##dispatch defs-vregs temp>> 1array ; M: ##slot defs-vregs dst/tmp-vregs ; M: ##set-slot defs-vregs temp>> 1array ; M: ##string-nth defs-vregs dst/tmp-vregs ; +M: ##set-string-nth-fast defs-vregs temp>> 1array ; M: ##compare defs-vregs dst/tmp-vregs ; M: ##compare-imm defs-vregs dst/tmp-vregs ; M: ##compare-float defs-vregs dst/tmp-vregs ; @@ -31,6 +32,7 @@ M: ##slot-imm uses-vregs obj>> 1array ; M: ##set-slot uses-vregs [ src>> ] [ obj>> ] [ slot>> ] tri 3array ; M: ##set-slot-imm uses-vregs [ src>> ] [ obj>> ] bi 2array ; M: ##string-nth uses-vregs [ obj>> ] [ index>> ] bi 2array ; +M: ##set-string-nth-fast uses-vregs [ src>> ] [ obj>> ] [ index>> ] tri 3array ; M: ##conditional-branch uses-vregs [ src1>> ] [ src2>> ] bi 2array ; M: ##compare-imm-branch uses-vregs src1>> 1array ; M: ##dispatch uses-vregs src>> 1array ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 62d4990c92..2e7e044739 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -73,6 +73,7 @@ INSN: ##set-slot-imm < ##write { obj vreg } { slot integer } { tag integer } ; ! String element access INSN: ##string-nth < ##flushable { obj vreg } { index vreg } { temp vreg } ; +INSN: ##set-string-nth-fast < ##effect { obj vreg } { index vreg } { temp vreg } ; ! Integer arithmetic INSN: ##add < ##commutative ; diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index aaa45c3937..cfc04fa036 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -45,6 +45,7 @@ IN: compiler.cfg.intrinsics slots.private:slot slots.private:set-slot strings.private:string-nth + strings.private:set-string-nth-fast classes.tuple.private: arrays: byte-arrays: @@ -126,6 +127,7 @@ IN: compiler.cfg.intrinsics { \ slots.private:slot [ emit-slot iterate-next ] } { \ slots.private:set-slot [ emit-set-slot iterate-next ] } { \ strings.private:string-nth [ drop emit-string-nth iterate-next ] } + { \ strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast iterate-next ] } { \ classes.tuple.private: [ emit- iterate-next ] } { \ arrays: [ emit- iterate-next ] } { \ byte-arrays: [ emit- iterate-next ] } diff --git a/basis/compiler/cfg/intrinsics/slots/slots.factor b/basis/compiler/cfg/intrinsics/slots/slots.factor index fec234a576..60ae1d2d0a 100644 --- a/basis/compiler/cfg/intrinsics/slots/slots.factor +++ b/basis/compiler/cfg/intrinsics/slots/slots.factor @@ -54,3 +54,7 @@ IN: compiler.cfg.intrinsics.slots : emit-string-nth ( -- ) 2inputs swap ^^untag-fixnum ^^string-nth ^^tag-fixnum ds-push ; + +: emit-set-string-nth-fast ( -- ) + 3inputs [ ^^untag-fixnum ] [ ^^untag-fixnum ] [ ] tri* + swap i ##set-string-nth-fast ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 2161c8b091..96db72c6ea 100644 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -131,6 +131,14 @@ M: ##string-nth generate-insn [ temp>> register ] } cleave %string-nth ; +M: ##set-string-nth-fast generate-insn + { + [ src>> register ] + [ obj>> register ] + [ index>> register ] + [ temp>> register ] + } cleave %set-string-nth-fast ; + : dst/src ( insn -- dst src ) [ dst>> register ] [ src>> register ] bi ; inline diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 12b6809df9..eb93a8dbb5 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -59,6 +59,7 @@ HOOK: %set-slot cpu ( src obj slot tag temp -- ) HOOK: %set-slot-imm cpu ( src obj slot tag -- ) HOOK: %string-nth cpu ( dst obj index temp -- ) +HOOK: %set-string-nth-fast cpu ( ch obj index temp -- ) HOOK: %add cpu ( dst src1 src2 -- ) HOOK: %add-imm cpu ( dst src1 src2 -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 3dbcd2eabf..d7234eb389 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -365,23 +365,38 @@ M:: x86 %box-alien ( dst src temp -- ) M:: x86 %string-nth ( dst src index temp -- ) "end" define-label dst { src index temp } [| new-dst | + ! Load the least significant 7 bits into new-dst. + ! 8th bit indicates whether we have to load from + ! the aux vector or not. temp src index [+] LEA new-dst 1 small-reg temp string-offset [+] MOV new-dst new-dst 1 small-reg MOVZX + ! Do we have to look at the aux vector? + new-dst HEX: 80 CMP + "end" get JL + ! Yes, this is a non-ASCII character. Load aux vector temp src string-aux-offset [+] MOV - temp \ f tag-number CMP - "end" get JE new-dst temp XCHG + ! Compute index new-dst index ADD new-dst index ADD + ! Load high 16 bits new-dst 2 small-reg new-dst byte-array-offset [+] MOV new-dst new-dst 2 small-reg MOVZX - new-dst 8 SHL - new-dst temp OR + new-dst 7 SHL + ! Compute code point + new-dst temp XOR "end" resolve-label dst new-dst ?MOV ] with-small-register ; +M:: x86 %set-string-nth-fast ( ch str index temp -- ) + ch { index str } [| new-ch | + new-ch ch ?MOV + temp str index [+] LEA + temp string-offset [+] new-ch 1 small-reg MOV + ] with-small-register ; + :: %alien-integer-getter ( dst src size quot -- ) dst { src } [| new-dst | new-dst dup size small-reg dup src [] MOV diff --git a/basis/stack-checker/known-words/known-words.factor b/basis/stack-checker/known-words/known-words.factor index 26e1b81c93..2cb3d1f006 100644 --- a/basis/stack-checker/known-words/known-words.factor +++ b/basis/stack-checker/known-words/known-words.factor @@ -562,7 +562,8 @@ M: object infer-call* \ string-nth { fixnum string } { fixnum } define-primitive \ string-nth make-flushable -\ set-string-nth { fixnum fixnum string } { } define-primitive +\ set-string-nth-slow { fixnum fixnum string } { } define-primitive +\ set-string-nth-fast { fixnum fixnum string } { } define-primitive \ resize-array { integer array } { array } define-primitive \ resize-array make-flushable diff --git a/core/bootstrap/primitives.factor b/core/bootstrap/primitives.factor index a4cee5c7b9..0a7e5fe233 100644 --- a/core/bootstrap/primitives.factor +++ b/core/bootstrap/primitives.factor @@ -499,7 +499,8 @@ tuple { "alien-address" "alien" } { "set-slot" "slots.private" } { "string-nth" "strings.private" } - { "set-string-nth" "strings.private" } + { "set-string-nth-fast" "strings.private" } + { "set-string-nth-slow" "strings.private" } { "resize-array" "arrays" } { "resize-string" "strings" } { "" "arrays" } diff --git a/core/strings/strings.factor b/core/strings/strings.factor index 39628ede98..0c3f918fdc 100644 --- a/core/strings/strings.factor +++ b/core/strings/strings.factor @@ -16,6 +16,10 @@ IN: strings : rehash-string ( str -- ) 1 over sequence-hashcode swap set-string-hashcode ; inline +: set-string-nth ( ch n str -- ) + pick HEX: 7f fixnum<= + [ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline + PRIVATE> M: string equal? @@ -27,8 +31,9 @@ M: string equal? ] if ; M: string hashcode* - nip dup string-hashcode [ ] - [ dup rehash-string string-hashcode ] ?if ; + nip + dup string-hashcode + [ ] [ dup rehash-string string-hashcode ] ?if ; M: string length length>> ; @@ -38,7 +43,7 @@ M: string nth-unsafe M: string set-nth-unsafe dup reset-string-hashcode - [ [ >fixnum ] dip >fixnum ] dip set-string-nth ; + [ >fixnum ] [ >fixnum ] [ ] tri* set-string-nth ; M: string clone (clone) [ clone ] change-aux ; diff --git a/vm/primitives.c b/vm/primitives.c index 135d5478ea..a01a8653b7 100755 --- a/vm/primitives.c +++ b/vm/primitives.c @@ -105,7 +105,8 @@ void *primitives[] = { primitive_alien_address, primitive_set_slot, primitive_string_nth, - primitive_set_string_nth, + primitive_set_string_nth_fast, + primitive_set_string_nth_slow, primitive_resize_array, primitive_resize_string, primitive_array, diff --git a/vm/types.c b/vm/types.c index d6e78013cb..a614011e7e 100755 --- a/vm/types.c +++ b/vm/types.c @@ -328,43 +328,62 @@ void primitive_tuple_boa(void) /* Strings */ CELL string_nth(F_STRING* string, CELL index) { + /* If high bit is set, the most significant 16 bits of the char + come from the aux vector. The least significant bit of the + corresponding aux vector entry is negated, so that we can + XOR the two components together and get the original code point + back. */ CELL ch = bget(SREF(string,index)); - if(string->aux == F) + if((ch & 0x80) == 0) return ch; else { F_BYTE_ARRAY *aux = untag_object(string->aux); - return (cget(BREF(aux,index * sizeof(u16))) << 8) | ch; + return (cget(BREF(aux,index * sizeof(u16))) << 7) ^ ch; } } -/* allocates memory */ -void set_string_nth(F_STRING* string, CELL index, CELL value) +void set_string_nth_fast(F_STRING* string, CELL index, CELL ch) { - bput(SREF(string,index),value & 0xff); + bput(SREF(string,index),ch); +} +void set_string_nth_slow(F_STRING* string, CELL index, CELL ch) +{ F_BYTE_ARRAY *aux; + bput(SREF(string,index),(ch & 0x7f) | 0x80); + if(string->aux == F) { - if(value <= 0xff) - return; - else - { - REGISTER_UNTAGGED(string); - aux = allot_byte_array( - untag_fixnum_fast(string->length) - * sizeof(u16)); - UNREGISTER_UNTAGGED(string); + REGISTER_UNTAGGED(string); + /* We don't need to pre-initialize the + byte array with any data, since we + only ever read from the aux vector + if the most significant bit of a + character is set. Initially all of + the bits are clear. */ + aux = allot_byte_array_internal( + untag_fixnum_fast(string->length) + * sizeof(u16)); + UNREGISTER_UNTAGGED(string); - write_barrier((CELL)string); - string->aux = tag_object(aux); - } + write_barrier((CELL)string); + string->aux = tag_object(aux); } else aux = untag_object(string->aux); - cput(BREF(aux,index * sizeof(u16)),value >> 8); + cput(BREF(aux,index * sizeof(u16)),(ch >> 7) ^ 1); +} + +/* allocates memory */ +void set_string_nth(F_STRING* string, CELL index, CELL ch) +{ + if(ch <= 0x7f) + set_string_nth_fast(string,index,ch); + else + set_string_nth_slow(string,index,ch); } /* untagged */ @@ -382,17 +401,8 @@ F_STRING* allot_string_internal(CELL capacity) /* allocates memory */ void fill_string(F_STRING *string, CELL start, CELL capacity, CELL fill) { - if(fill == 0) - { - memset((void *)SREF(string,start),'\0',capacity - start); - - if(string->aux != F) - { - F_BYTE_ARRAY *aux = untag_object(string->aux); - memset((void *)BREF(aux,start * sizeof(u16)),'\0', - (capacity - start) * sizeof(u16)); - } - } + if(fill <= 0x7f) + memset((void *)SREF(string,start),fill,capacity - start); else { CELL i; @@ -572,3 +582,19 @@ void primitive_set_string_nth(void) CELL value = untag_fixnum_fast(dpop()); set_string_nth(string,index,value); } + +void primitive_set_string_nth_fast(void) +{ + F_STRING *string = untag_object(dpop()); + CELL index = untag_fixnum_fast(dpop()); + CELL value = untag_fixnum_fast(dpop()); + set_string_nth_fast(string,index,value); +} + +void primitive_set_string_nth_slow(void) +{ + F_STRING *string = untag_object(dpop()); + CELL index = untag_fixnum_fast(dpop()); + CELL value = untag_fixnum_fast(dpop()); + set_string_nth_slow(string,index,value); +} diff --git a/vm/types.h b/vm/types.h index 47747547db..242939c502 100755 --- a/vm/types.h +++ b/vm/types.h @@ -152,7 +152,8 @@ CELL string_nth(F_STRING* string, CELL index); void set_string_nth(F_STRING* string, CELL index, CELL value); void primitive_string_nth(void); -void primitive_set_string_nth(void); +void primitive_set_string_nth_slow(void); +void primitive_set_string_nth_fast(void); F_WORD *allot_word(CELL vocab, CELL name); void primitive_word(void);