diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 13c9f55b9f..8ee21154fa 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -89,12 +89,6 @@ INSN: ##set-slot-imm use: src/tagged-rep obj/tagged-rep literal: slot tag ; -! String element access -INSN: ##string-nth -def: dst/int-rep -use: obj/tagged-rep index/int-rep -temp: temp/int-rep ; - ! Register transfers INSN: ##copy def: dst @@ -806,7 +800,6 @@ UNION: kill-vreg-insn UNION: def-is-use-insn ##box-alien ##box-displaced-alien -##string-nth ##unbox-any-c-ptr ; SYMBOL: vreg-insn diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 231cd5cee9..4faa4809e5 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -56,7 +56,7 @@ IN: compiler.cfg.intrinsics { kernel:eq? [ emit-eq ] } { slots.private:slot [ emit-slot ] } { slots.private:set-slot [ emit-set-slot ] } - { strings.private:string-nth [ drop emit-string-nth ] } + { strings.private:string-nth-fast [ drop emit-string-nth-fast ] } { strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast ] } { classes.tuple.private: [ emit- ] } { arrays: [ emit- ] } diff --git a/basis/compiler/cfg/intrinsics/strings/strings.factor b/basis/compiler/cfg/intrinsics/strings/strings.factor index dea9510a99..70d8442a2b 100644 --- a/basis/compiler/cfg/intrinsics/strings/strings.factor +++ b/basis/compiler/cfg/intrinsics/strings/strings.factor @@ -5,9 +5,11 @@ compiler.cfg.instructions compiler.cfg.registers compiler.cfg.stacks cpu.architecture ; IN: compiler.cfg.intrinsics.strings -: emit-string-nth ( -- ) - 2inputs swap ^^string-nth ds-push ; +: (string-nth) ( n string -- base offset rep c-type ) + ^^tagged>integer swap ^^add string-offset int-rep uchar ; inline + +: emit-string-nth-fast ( -- ) + 2inputs (string-nth) ^^load-memory-imm ds-push ; : emit-set-string-nth-fast ( -- ) - 3inputs ^^tagged>integer ^^add string-offset - int-rep uchar ##store-memory-imm ; + 3inputs (string-nth) ##store-memory-imm ; diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index d0747d4a1e..63571e7874 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -93,7 +93,6 @@ CODEGEN: ##slot %slot CODEGEN: ##slot-imm %slot-imm CODEGEN: ##set-slot %set-slot CODEGEN: ##set-slot-imm %set-slot-imm -CODEGEN: ##string-nth %string-nth CODEGEN: ##add %add CODEGEN: ##add-imm %add-imm CODEGEN: ##sub %sub diff --git a/basis/compiler/tests/low-level-ir.factor b/basis/compiler/tests/low-level-ir.factor index 7ce43e9524..57612e730e 100644 --- a/basis/compiler/tests/low-level-ir.factor +++ b/basis/compiler/tests/low-level-ir.factor @@ -97,15 +97,6 @@ IN: compiler.tests.low-level-ir } compile-test-bb ] unit-test -[ CHAR: l ] [ - V{ - T{ ##load-reference f 0 "hello world" } - T{ ##load-tagged f 1 3 } - T{ ##string-nth f 0 0 1 2 } - T{ ##shl-imm f 0 0 4 } - } compile-test-bb -] unit-test - [ 1 ] [ V{ T{ ##load-tagged f 0 32 } diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index 55629507ab..ada01e213a 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -254,8 +254,8 @@ generic-comparison-ops [ ] "outputs" set-word-prop ] each -\ string-nth [ - 2drop fixnum 0 23 2^ [a,b] +\ string-nth-fast [ + 2drop fixnum 0 255 [a,b] ] "outputs" set-word-prop { diff --git a/basis/compiler/tree/propagation/propagation-tests.factor b/basis/compiler/tree/propagation/propagation-tests.factor index ad8a75ecdd..d1a1dd18a6 100644 --- a/basis/compiler/tree/propagation/propagation-tests.factor +++ b/basis/compiler/tree/propagation/propagation-tests.factor @@ -968,3 +968,10 @@ M: tuple-with-read-only-slot clone [ t ] [ [ { 1 } diff ] { diff } inlined? ] unit-test [ f ] [ [ { 1 } swap diff ] { diff } inlined? ] unit-test ! We could do this + +! Output range for string-nth now that string-nth is a library word and +! not a primitive +[ t ] [ + ! Should actually be 0 23 2^ 1 - [a,b] + [ string-nth ] final-info first interval>> 0 23 2^ [a,b] = +] unit-test diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index a77337d1a0..d7e77d6267 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -244,8 +244,6 @@ HOOK: %slot-imm cpu ( dst obj slot tag -- ) HOOK: %set-slot cpu ( src obj slot scale tag -- ) HOOK: %set-slot-imm cpu ( src obj slot tag -- ) -HOOK: %string-nth cpu ( dst obj index temp -- ) - HOOK: %add cpu ( dst src1 src2 -- ) HOOK: %add-imm cpu ( dst src1 src2 -- ) HOOK: %sub cpu ( dst src1 src2 -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 3c23ae1b5f..70e8ef11ea 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -144,22 +144,6 @@ M: ppc %slot-imm ( dst obj slot tag -- ) slot-offset LWZ ; M: ppc %set-slot ( src obj slot -- ) swapd STWX ; M: ppc %set-slot-imm ( src obj slot tag -- ) slot-offset STW ; -M:: ppc %string-nth ( dst src index temp -- ) - [ - "end" define-label - temp src index ADD - dst temp string-offset LBZ - 0 dst HEX: 80 CMPI - "end" get BLT - temp src string-aux-offset LWZ - temp temp index ADD - temp temp index ADD - temp temp byte-array-offset LHZ - temp temp 7 SLWI - dst dst temp XOR - "end" resolve-label - ] with-scope ; - M: ppc %add ADD ; M: ppc %add-imm ADDI ; M: ppc %sub swap SUBF ; diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index 969c02c910..5bb55bead0 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -3,7 +3,8 @@ USING: bootstrap.image.private compiler.constants compiler.units cpu.x86.assembler cpu.x86.assembler.operands kernel kernel.private layouts locals.backend make math -math.private namespaces sequences slots.private vocabs ; +math.private namespaces sequences slots.private strings.private +vocabs ; IN: bootstrap.x86 big-endian off @@ -294,6 +295,21 @@ big-endian off ds-reg [] temp0 MOV ] \ slot define-sub-primitive +[ + ! load string index from stack + temp0 ds-reg bootstrap-cell neg [+] MOV + temp0 tag-bits get SHR + ! load string from stack + temp1 ds-reg [] MOV + ! load character + temp0 8-bit-version-of temp0 temp1 string-offset [++] MOV + temp0 temp0 8-bit-version-of MOVZX + temp0 tag-bits get SHL + ! store character to stack + ds-reg bootstrap-cell SUB + ds-reg [] temp0 MOV +] \ string-nth-fast define-sub-primitive + ! Shufflers [ ds-reg bootstrap-cell SUB diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index a7fd859c20..d0afb7fa81 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -328,32 +328,6 @@ M: x86.64 has-small-reg? 2drop t ; [ quot call ] with-save/restore ] if ; inline -M:: x86 %string-nth ( dst src index temp -- ) - ! We request a small-reg of size 8 since those of size 16 are - ! a superset. - "end" define-label - dst { src index temp } 8 [| new-dst | - ! Load the least significant 7 bits into new-dst. - ! 8th bit indicates whether we have to load from - ! the aux vector or not. - new-dst 8-bit-version-of src index string-offset [++] MOV - new-dst new-dst 8-bit-version-of MOVZX - ! Do we have to look at the aux vector? - new-dst HEX: 80 CMP - "end" get JL - ! Yes, this is a non-ASCII character. Load aux vector - temp src string-aux-offset [+] MOV - new-dst temp XCHG - ! Load high 16 bits - new-dst 16-bit-version-of new-dst index byte-array-offset [+*2+] MOV - new-dst new-dst 16-bit-version-of MOVZX - new-dst 7 SHL - ! Compute code point - new-dst temp XOR - "end" resolve-label - dst new-dst int-rep %copy - ] with-small-register ; - :: %alien-integer-getter ( dst exclude address bits quot -- ) dst exclude bits [| new-dst | new-dst dup bits n-bit-version-of dup address MOV diff --git a/basis/stack-checker/known-words/known-words.factor b/basis/stack-checker/known-words/known-words.factor index c0d4b6c543..a652c500ba 100644 --- a/basis/stack-checker/known-words/known-words.factor +++ b/basis/stack-checker/known-words/known-words.factor @@ -454,11 +454,10 @@ M: bad-executable summary \ set-slot { object object fixnum } { } define-primitive \ set-special-object { object fixnum } { } define-primitive \ set-string-nth-fast { fixnum fixnum string } { } define-primitive -\ set-string-nth-slow { fixnum fixnum string } { } define-primitive \ size { object } { fixnum } define-primitive \ size make-flushable \ slot { object fixnum } { object } define-primitive \ slot make-flushable \ special-object { fixnum } { object } define-primitive \ special-object make-flushable -\ string-nth { fixnum string } { fixnum } define-primitive \ string-nth make-flushable +\ string-nth-fast { fixnum string } { fixnum } define-primitive \ string-nth-fast make-flushable \ strip-stack-traces { } { } define-primitive \ system-micros { } { integer } define-primitive \ system-micros make-flushable \ tag { object } { fixnum } define-primitive \ tag make-foldable diff --git a/core/bootstrap/primitives.factor b/core/bootstrap/primitives.factor index 27699725f1..c00199e9b3 100644 --- a/core/bootstrap/primitives.factor +++ b/core/bootstrap/primitives.factor @@ -370,6 +370,7 @@ tuple { "fixnum<=" "math.private" (( x y -- z )) } { "fixnum>" "math.private" (( x y -- ? )) } { "fixnum>=" "math.private" (( x y -- ? )) } + { "string-nth-fast" "strings.private" (( n string -- ch )) } { "(set-context)" "threads.private" (( obj context -- obj' )) } { "(set-context-and-delete)" "threads.private" (( obj context -- * )) } { "(start-context)" "threads.private" (( obj quot -- obj' )) } @@ -533,8 +534,6 @@ tuple { "" "strings" "primitive_string" (( n ch -- string )) } { "resize-string" "strings" "primitive_resize_string" (( n str -- newstr )) } { "set-string-nth-fast" "strings.private" "primitive_set_string_nth_fast" (( ch n string -- )) } - { "set-string-nth-slow" "strings.private" "primitive_set_string_nth_slow" (( ch n string -- )) } - { "string-nth" "strings.private" "primitive_string_nth" (( n string -- ch )) } { "(exit)" "system" "primitive_exit" (( n -- * )) } { "nano-count" "system" "primitive_nano_count" (( -- ns )) } { "system-micros" "system" "primitive_system_micros" (( -- us )) } diff --git a/core/strings/strings-tests.factor b/core/strings/strings-tests.factor index b90d96a356..247bd8d007 100644 --- a/core/strings/strings-tests.factor +++ b/core/strings/strings-tests.factor @@ -85,6 +85,9 @@ unit-test "s" get >array ] unit-test +! Make sure string initialization works +[ HEX: 123456 ] [ 100 HEX: 123456 first ] unit-test + ! Make sure we clear aux vector when storing octets [ "\u123456hi" ] [ "ih\u123456" clone reverse! ] unit-test diff --git a/core/strings/strings.factor b/core/strings/strings.factor index 50d79a4d8a..f356d2a877 100644 --- a/core/strings/strings.factor +++ b/core/strings/strings.factor @@ -1,7 +1,7 @@ -! Copyright (C) 2003, 2008 Slava Pestov. +! Copyright (C) 2003, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel math.private sequences kernel.private -math sequences.private slots.private alien.accessors ; +USING: accessors alien.accessors byte-arrays kernel math.private +sequences kernel.private math sequences.private slots.private ; IN: strings > { byte-array } declare swap 1 fixnum-shift-fast ; inline + +: small-char? ( ch -- ? ) HEX: 7f fixnum<= ; inline + +: string-nth ( n string -- ch ) + 2dup string-nth-fast dup small-char? + [ 2nip ] [ + [ (aux) alien-unsigned-2 7 fixnum-shift-fast ] dip + fixnum-bitxor + ] if ; inline + +: ensure-aux ( string -- string ) + dup aux>> [ dup length 2 * (byte-array) >>aux ] unless ; inline + +: set-string-nth-slow ( ch n string -- ) + [ [ HEX: 80 fixnum-bitor ] 2dip set-string-nth-fast ] + [ + ensure-aux + [ -7 fixnum-shift-fast 1 fixnum-bitxor ] 2dip + (aux) set-alien-unsigned-2 + ] 3bi ; + : set-string-nth ( ch n string -- ) - pick HEX: 7f fixnum<= + pick small-char? [ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline PRIVATE> diff --git a/vm/debug.cpp b/vm/debug.cpp index 85335d49ae..bb3a8b0ce5 100755 --- a/vm/debug.cpp +++ b/vm/debug.cpp @@ -6,7 +6,7 @@ namespace factor std::ostream &operator<<(std::ostream &out, const string *str) { for(cell i = 0; i < string_capacity(str); i++) - out << (char)str->nth(i); + out << (char)str->data()[i]; return out; } diff --git a/vm/layouts.hpp b/vm/layouts.hpp index 3e51d1fa4d..0cf8607a05 100644 --- a/vm/layouts.hpp +++ b/vm/layouts.hpp @@ -205,8 +205,6 @@ struct string : public object { cell hashcode; u8 *data() const { return (u8 *)(this + 1); } - - cell nth(cell i) const; }; struct code_block; diff --git a/vm/primitives.hpp b/vm/primitives.hpp index a2bf912749..cf52168231 100644 --- a/vm/primitives.hpp +++ b/vm/primitives.hpp @@ -120,12 +120,10 @@ namespace factor _(set_slot) \ _(set_special_object) \ _(set_string_nth_fast) \ - _(set_string_nth_slow) \ _(size) \ _(sleep) \ _(special_object) \ _(string) \ - _(string_nth) \ _(strip_stack_traces) \ _(system_micros) \ _(tuple) \ diff --git a/vm/strings.cpp b/vm/strings.cpp index 5aad936a9e..aea4641905 100644 --- a/vm/strings.cpp +++ b/vm/strings.cpp @@ -3,66 +3,6 @@ namespace factor { -cell string::nth(cell index) const -{ - /* If high bit is set, the most significant 16 bits of the char - come from the aux vector. The least significant bit of the - corresponding aux vector entry is negated, so that we can - XOR the two components together and get the original code point - back. */ - cell lo_bits = data()[index]; - - if((lo_bits & 0x80) == 0) - return lo_bits; - else - { - byte_array *aux = untag(this->aux); - cell hi_bits = aux->data()[index]; - return (hi_bits << 7) ^ lo_bits; - } -} - -void factor_vm::set_string_nth_fast(string *str, cell index, cell ch) -{ - str->data()[index] = (u8)ch; -} - -void factor_vm::set_string_nth_slow(string *str_, cell index, cell ch) -{ - data_root str(str_,this); - - byte_array *aux; - - str->data()[index] = ((ch & 0x7f) | 0x80); - - if(to_boolean(str->aux)) - aux = untag(str->aux); - else - { - /* We don't need to pre-initialize the - byte array with any data, since we - only ever read from the aux vector - if the most significant bit of a - character is set. Initially all of - the bits are clear. */ - aux = allot_uninitialized_array(untag_fixnum(str->length) * sizeof(u16)); - - str->aux = tag(aux); - write_barrier(&str->aux); - } - - aux->data()[index] = (u16)((ch >> 7) ^ 1); -} - -/* allocates memory */ -void factor_vm::set_string_nth(string *str, cell index, cell ch) -{ - if(ch <= 0x7f) - set_string_nth_fast(str,index,ch); - else - set_string_nth_slow(str,index,ch); -} - /* Allocates memory */ string *factor_vm::allot_string_internal(cell capacity) { @@ -81,13 +21,23 @@ void factor_vm::fill_string(string *str_, cell start, cell capacity, cell fill) data_root str(str_,this); if(fill <= 0x7f) - memset(&str->data()[start],(int)fill,capacity - start); + memset(&str->data()[start],(u8)fill,capacity - start); else { - cell i; + byte_array *aux; + if(to_boolean(str->aux)) + aux = untag(str->aux); + else + { + aux = allot_uninitialized_array(untag_fixnum(str->length) * 2); + str->aux = tag(aux); + write_barrier(&str->aux); + } - for(i = start; i < capacity; i++) - set_string_nth(str.untagged(),i,fill); + u8 lo_fill = (u8)((fill & 0x7f) | 0x80); + u16 hi_fill = (u16)((fill >> 7) ^ 0x1); + memset(&str->data()[start],lo_fill,capacity - start); + memset_2(&aux->data()[start],hi_fill,(capacity - start) * sizeof(u16)); } } @@ -141,8 +91,7 @@ string* factor_vm::reallot_string(string *str_, cell capacity) if(to_boolean(str->aux)) { - byte_array *new_aux = allot_byte_array(capacity * sizeof(u16)); - + byte_array *new_aux = allot_uninitialized_array(capacity * 2); new_str->aux = tag(new_aux); write_barrier(&new_str->aux); @@ -163,27 +112,12 @@ void factor_vm::primitive_resize_string() ctx->push(tag(reallot_string(str.untagged(),capacity))); } -void factor_vm::primitive_string_nth() -{ - string *str = untag(ctx->pop()); - cell index = untag_fixnum(ctx->pop()); - ctx->push(tag_fixnum(str->nth(index))); -} - void factor_vm::primitive_set_string_nth_fast() { string *str = untag(ctx->pop()); cell index = untag_fixnum(ctx->pop()); cell value = untag_fixnum(ctx->pop()); - set_string_nth_fast(str,index,value); -} - -void factor_vm::primitive_set_string_nth_slow() -{ - string *str = untag(ctx->pop()); - cell index = untag_fixnum(ctx->pop()); - cell value = untag_fixnum(ctx->pop()); - set_string_nth_slow(str,index,value); + str->data()[index] = (u8)value; } } diff --git a/vm/utilities.hpp b/vm/utilities.hpp index cea70c0c37..e75d3ece12 100755 --- a/vm/utilities.hpp +++ b/vm/utilities.hpp @@ -1,6 +1,27 @@ namespace factor { +inline static void memset_2(void *dst, u16 pattern, size_t size) +{ +#ifdef __APPLE__ + cell cell_pattern = (pattern | (pattern << 16)); + memset_pattern4(dst,&cell_pattern,size); +#else + if(pattern == 0) + memset(dst,0,size); + else + { + u16 *start = (u16 *)dst; + u16 *end = (u16 *)((cell)dst + size); + while(start < end) + { + *start = pattern; + start++; + } + } +#endif +} + inline static void memset_cell(void *dst, cell pattern, size_t size) { #ifdef __APPLE__ diff --git a/vm/vm.hpp b/vm/vm.hpp index d9bd17fa51..3b6fb2311f 100755 --- a/vm/vm.hpp +++ b/vm/vm.hpp @@ -381,10 +381,6 @@ struct factor_vm cell std_vector_to_array(std::vector &elements); // strings - cell string_nth(const string *str, cell index); - void set_string_nth_fast(string *str, cell index, cell ch); - void set_string_nth_slow(string *str_, cell index, cell ch); - void set_string_nth(string *str, cell index, cell ch); string *allot_string_internal(cell capacity); void fill_string(string *str_, cell start, cell capacity, cell fill); string *allot_string(cell capacity, cell fill); @@ -392,9 +388,7 @@ struct factor_vm bool reallot_string_in_place_p(string *str, cell capacity); string* reallot_string(string *str_, cell capacity); void primitive_resize_string(); - void primitive_string_nth(); void primitive_set_string_nth_fast(); - void primitive_set_string_nth_slow(); // booleans cell tag_boolean(cell untagged)