strings: move string-nth primitive out of the VM and into the library

db4
Slava Pestov 2010-04-25 20:19:50 -04:00
parent 458fd007be
commit a9ccfa8f05
21 changed files with 103 additions and 170 deletions

View File

@ -89,12 +89,6 @@ INSN: ##set-slot-imm
use: src/tagged-rep obj/tagged-rep
literal: slot tag ;
! String element access
INSN: ##string-nth
def: dst/int-rep
use: obj/tagged-rep index/int-rep
temp: temp/int-rep ;
! Register transfers
INSN: ##copy
def: dst
@ -806,7 +800,6 @@ UNION: kill-vreg-insn
UNION: def-is-use-insn
##box-alien
##box-displaced-alien
##string-nth
##unbox-any-c-ptr ;
SYMBOL: vreg-insn

View File

@ -56,7 +56,7 @@ IN: compiler.cfg.intrinsics
{ kernel:eq? [ emit-eq ] }
{ slots.private:slot [ emit-slot ] }
{ slots.private:set-slot [ emit-set-slot ] }
{ strings.private:string-nth [ drop emit-string-nth ] }
{ strings.private:string-nth-fast [ drop emit-string-nth-fast ] }
{ strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast ] }
{ classes.tuple.private:<tuple-boa> [ emit-<tuple-boa> ] }
{ arrays:<array> [ emit-<array> ] }

View File

@ -5,9 +5,11 @@ compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.stacks cpu.architecture ;
IN: compiler.cfg.intrinsics.strings
: emit-string-nth ( -- )
2inputs swap ^^string-nth ds-push ;
: (string-nth) ( n string -- base offset rep c-type )
^^tagged>integer swap ^^add string-offset int-rep uchar ; inline
: emit-string-nth-fast ( -- )
2inputs (string-nth) ^^load-memory-imm ds-push ;
: emit-set-string-nth-fast ( -- )
3inputs ^^tagged>integer ^^add string-offset
int-rep uchar ##store-memory-imm ;
3inputs (string-nth) ##store-memory-imm ;

View File

@ -93,7 +93,6 @@ CODEGEN: ##slot %slot
CODEGEN: ##slot-imm %slot-imm
CODEGEN: ##set-slot %set-slot
CODEGEN: ##set-slot-imm %set-slot-imm
CODEGEN: ##string-nth %string-nth
CODEGEN: ##add %add
CODEGEN: ##add-imm %add-imm
CODEGEN: ##sub %sub

View File

@ -97,15 +97,6 @@ IN: compiler.tests.low-level-ir
} compile-test-bb
] unit-test
[ CHAR: l ] [
V{
T{ ##load-reference f 0 "hello world" }
T{ ##load-tagged f 1 3 }
T{ ##string-nth f 0 0 1 2 }
T{ ##shl-imm f 0 0 4 }
} compile-test-bb
] unit-test
[ 1 ] [
V{
T{ ##load-tagged f 0 32 }

View File

@ -254,8 +254,8 @@ generic-comparison-ops [
] "outputs" set-word-prop
] each
\ string-nth [
2drop fixnum 0 23 2^ [a,b] <class/interval-info>
\ string-nth-fast [
2drop fixnum 0 255 [a,b] <class/interval-info>
] "outputs" set-word-prop
{

View File

@ -968,3 +968,10 @@ M: tuple-with-read-only-slot clone
[ t ] [ [ { 1 } diff ] { diff } inlined? ] unit-test
[ f ] [ [ { 1 } swap diff ] { diff } inlined? ] unit-test ! We could do this
! Output range for string-nth now that string-nth is a library word and
! not a primitive
[ t ] [
! Should actually be 0 23 2^ 1 - [a,b]
[ string-nth ] final-info first interval>> 0 23 2^ [a,b] =
] unit-test

View File

@ -244,8 +244,6 @@ HOOK: %slot-imm cpu ( dst obj slot tag -- )
HOOK: %set-slot cpu ( src obj slot scale tag -- )
HOOK: %set-slot-imm cpu ( src obj slot tag -- )
HOOK: %string-nth cpu ( dst obj index temp -- )
HOOK: %add cpu ( dst src1 src2 -- )
HOOK: %add-imm cpu ( dst src1 src2 -- )
HOOK: %sub cpu ( dst src1 src2 -- )

View File

@ -144,22 +144,6 @@ M: ppc %slot-imm ( dst obj slot tag -- ) slot-offset LWZ ;
M: ppc %set-slot ( src obj slot -- ) swapd STWX ;
M: ppc %set-slot-imm ( src obj slot tag -- ) slot-offset STW ;
M:: ppc %string-nth ( dst src index temp -- )
[
"end" define-label
temp src index ADD
dst temp string-offset LBZ
0 dst HEX: 80 CMPI
"end" get BLT
temp src string-aux-offset LWZ
temp temp index ADD
temp temp index ADD
temp temp byte-array-offset LHZ
temp temp 7 SLWI
dst dst temp XOR
"end" resolve-label
] with-scope ;
M: ppc %add ADD ;
M: ppc %add-imm ADDI ;
M: ppc %sub swap SUBF ;

View File

@ -3,7 +3,8 @@
USING: bootstrap.image.private compiler.constants
compiler.units cpu.x86.assembler cpu.x86.assembler.operands
kernel kernel.private layouts locals.backend make math
math.private namespaces sequences slots.private vocabs ;
math.private namespaces sequences slots.private strings.private
vocabs ;
IN: bootstrap.x86
big-endian off
@ -294,6 +295,21 @@ big-endian off
ds-reg [] temp0 MOV
] \ slot define-sub-primitive
[
! load string index from stack
temp0 ds-reg bootstrap-cell neg [+] MOV
temp0 tag-bits get SHR
! load string from stack
temp1 ds-reg [] MOV
! load character
temp0 8-bit-version-of temp0 temp1 string-offset [++] MOV
temp0 temp0 8-bit-version-of MOVZX
temp0 tag-bits get SHL
! store character to stack
ds-reg bootstrap-cell SUB
ds-reg [] temp0 MOV
] \ string-nth-fast define-sub-primitive
! Shufflers
[
ds-reg bootstrap-cell SUB

View File

@ -328,32 +328,6 @@ M: x86.64 has-small-reg? 2drop t ;
[ quot call ] with-save/restore
] if ; inline
M:: x86 %string-nth ( dst src index temp -- )
! We request a small-reg of size 8 since those of size 16 are
! a superset.
"end" define-label
dst { src index temp } 8 [| new-dst |
! Load the least significant 7 bits into new-dst.
! 8th bit indicates whether we have to load from
! the aux vector or not.
new-dst 8-bit-version-of src index string-offset [++] MOV
new-dst new-dst 8-bit-version-of MOVZX
! Do we have to look at the aux vector?
new-dst HEX: 80 CMP
"end" get JL
! Yes, this is a non-ASCII character. Load aux vector
temp src string-aux-offset [+] MOV
new-dst temp XCHG
! Load high 16 bits
new-dst 16-bit-version-of new-dst index byte-array-offset [+*2+] MOV
new-dst new-dst 16-bit-version-of MOVZX
new-dst 7 SHL
! Compute code point
new-dst temp XOR
"end" resolve-label
dst new-dst int-rep %copy
] with-small-register ;
:: %alien-integer-getter ( dst exclude address bits quot -- )
dst exclude bits [| new-dst |
new-dst dup bits n-bit-version-of dup address MOV

View File

@ -454,11 +454,10 @@ M: bad-executable summary
\ set-slot { object object fixnum } { } define-primitive
\ set-special-object { object fixnum } { } define-primitive
\ set-string-nth-fast { fixnum fixnum string } { } define-primitive
\ set-string-nth-slow { fixnum fixnum string } { } define-primitive
\ size { object } { fixnum } define-primitive \ size make-flushable
\ slot { object fixnum } { object } define-primitive \ slot make-flushable
\ special-object { fixnum } { object } define-primitive \ special-object make-flushable
\ string-nth { fixnum string } { fixnum } define-primitive \ string-nth make-flushable
\ string-nth-fast { fixnum string } { fixnum } define-primitive \ string-nth-fast make-flushable
\ strip-stack-traces { } { } define-primitive
\ system-micros { } { integer } define-primitive \ system-micros make-flushable
\ tag { object } { fixnum } define-primitive \ tag make-foldable

View File

@ -370,6 +370,7 @@ tuple
{ "fixnum<=" "math.private" (( x y -- z )) }
{ "fixnum>" "math.private" (( x y -- ? )) }
{ "fixnum>=" "math.private" (( x y -- ? )) }
{ "string-nth-fast" "strings.private" (( n string -- ch )) }
{ "(set-context)" "threads.private" (( obj context -- obj' )) }
{ "(set-context-and-delete)" "threads.private" (( obj context -- * )) }
{ "(start-context)" "threads.private" (( obj quot -- obj' )) }
@ -533,8 +534,6 @@ tuple
{ "<string>" "strings" "primitive_string" (( n ch -- string )) }
{ "resize-string" "strings" "primitive_resize_string" (( n str -- newstr )) }
{ "set-string-nth-fast" "strings.private" "primitive_set_string_nth_fast" (( ch n string -- )) }
{ "set-string-nth-slow" "strings.private" "primitive_set_string_nth_slow" (( ch n string -- )) }
{ "string-nth" "strings.private" "primitive_string_nth" (( n string -- ch )) }
{ "(exit)" "system" "primitive_exit" (( n -- * )) }
{ "nano-count" "system" "primitive_nano_count" (( -- ns )) }
{ "system-micros" "system" "primitive_system_micros" (( -- us )) }

View File

@ -85,6 +85,9 @@ unit-test
"s" get >array
] unit-test
! Make sure string initialization works
[ HEX: 123456 ] [ 100 HEX: 123456 <string> first ] unit-test
! Make sure we clear aux vector when storing octets
[ "\u123456hi" ] [ "ih\u123456" clone reverse! ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2003, 2008 Slava Pestov.
! Copyright (C) 2003, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math.private sequences kernel.private
math sequences.private slots.private alien.accessors ;
USING: accessors alien.accessors byte-arrays kernel math.private
sequences kernel.private math sequences.private slots.private ;
IN: strings
<PRIVATE
@ -16,8 +16,31 @@ IN: strings
: rehash-string ( str -- )
1 over sequence-hashcode swap set-string-hashcode ; inline
: (aux) ( n string -- byte-array m )
aux>> { byte-array } declare swap 1 fixnum-shift-fast ; inline
: small-char? ( ch -- ? ) HEX: 7f fixnum<= ; inline
: string-nth ( n string -- ch )
2dup string-nth-fast dup small-char?
[ 2nip ] [
[ (aux) alien-unsigned-2 7 fixnum-shift-fast ] dip
fixnum-bitxor
] if ; inline
: ensure-aux ( string -- string )
dup aux>> [ dup length 2 * (byte-array) >>aux ] unless ; inline
: set-string-nth-slow ( ch n string -- )
[ [ HEX: 80 fixnum-bitor ] 2dip set-string-nth-fast ]
[
ensure-aux
[ -7 fixnum-shift-fast 1 fixnum-bitxor ] 2dip
(aux) set-alien-unsigned-2
] 3bi ;
: set-string-nth ( ch n string -- )
pick HEX: 7f fixnum<=
pick small-char?
[ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline
PRIVATE>

View File

@ -6,7 +6,7 @@ namespace factor
std::ostream &operator<<(std::ostream &out, const string *str)
{
for(cell i = 0; i < string_capacity(str); i++)
out << (char)str->nth(i);
out << (char)str->data()[i];
return out;
}

View File

@ -205,8 +205,6 @@ struct string : public object {
cell hashcode;
u8 *data() const { return (u8 *)(this + 1); }
cell nth(cell i) const;
};
struct code_block;

View File

@ -120,12 +120,10 @@ namespace factor
_(set_slot) \
_(set_special_object) \
_(set_string_nth_fast) \
_(set_string_nth_slow) \
_(size) \
_(sleep) \
_(special_object) \
_(string) \
_(string_nth) \
_(strip_stack_traces) \
_(system_micros) \
_(tuple) \

View File

@ -3,66 +3,6 @@
namespace factor
{
cell string::nth(cell index) const
{
/* If high bit is set, the most significant 16 bits of the char
come from the aux vector. The least significant bit of the
corresponding aux vector entry is negated, so that we can
XOR the two components together and get the original code point
back. */
cell lo_bits = data()[index];
if((lo_bits & 0x80) == 0)
return lo_bits;
else
{
byte_array *aux = untag<byte_array>(this->aux);
cell hi_bits = aux->data<u16>()[index];
return (hi_bits << 7) ^ lo_bits;
}
}
void factor_vm::set_string_nth_fast(string *str, cell index, cell ch)
{
str->data()[index] = (u8)ch;
}
void factor_vm::set_string_nth_slow(string *str_, cell index, cell ch)
{
data_root<string> str(str_,this);
byte_array *aux;
str->data()[index] = ((ch & 0x7f) | 0x80);
if(to_boolean(str->aux))
aux = untag<byte_array>(str->aux);
else
{
/* We don't need to pre-initialize the
byte array with any data, since we
only ever read from the aux vector
if the most significant bit of a
character is set. Initially all of
the bits are clear. */
aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * sizeof(u16));
str->aux = tag<byte_array>(aux);
write_barrier(&str->aux);
}
aux->data<u16>()[index] = (u16)((ch >> 7) ^ 1);
}
/* allocates memory */
void factor_vm::set_string_nth(string *str, cell index, cell ch)
{
if(ch <= 0x7f)
set_string_nth_fast(str,index,ch);
else
set_string_nth_slow(str,index,ch);
}
/* Allocates memory */
string *factor_vm::allot_string_internal(cell capacity)
{
@ -81,13 +21,23 @@ void factor_vm::fill_string(string *str_, cell start, cell capacity, cell fill)
data_root<string> str(str_,this);
if(fill <= 0x7f)
memset(&str->data()[start],(int)fill,capacity - start);
memset(&str->data()[start],(u8)fill,capacity - start);
else
{
cell i;
byte_array *aux;
if(to_boolean(str->aux))
aux = untag<byte_array>(str->aux);
else
{
aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * 2);
str->aux = tag<byte_array>(aux);
write_barrier(&str->aux);
}
for(i = start; i < capacity; i++)
set_string_nth(str.untagged(),i,fill);
u8 lo_fill = (u8)((fill & 0x7f) | 0x80);
u16 hi_fill = (u16)((fill >> 7) ^ 0x1);
memset(&str->data()[start],lo_fill,capacity - start);
memset_2(&aux->data<u16>()[start],hi_fill,(capacity - start) * sizeof(u16));
}
}
@ -141,8 +91,7 @@ string* factor_vm::reallot_string(string *str_, cell capacity)
if(to_boolean(str->aux))
{
byte_array *new_aux = allot_byte_array(capacity * sizeof(u16));
byte_array *new_aux = allot_uninitialized_array<byte_array>(capacity * 2);
new_str->aux = tag<byte_array>(new_aux);
write_barrier(&new_str->aux);
@ -163,27 +112,12 @@ void factor_vm::primitive_resize_string()
ctx->push(tag<string>(reallot_string(str.untagged(),capacity)));
}
void factor_vm::primitive_string_nth()
{
string *str = untag<string>(ctx->pop());
cell index = untag_fixnum(ctx->pop());
ctx->push(tag_fixnum(str->nth(index)));
}
void factor_vm::primitive_set_string_nth_fast()
{
string *str = untag<string>(ctx->pop());
cell index = untag_fixnum(ctx->pop());
cell value = untag_fixnum(ctx->pop());
set_string_nth_fast(str,index,value);
}
void factor_vm::primitive_set_string_nth_slow()
{
string *str = untag<string>(ctx->pop());
cell index = untag_fixnum(ctx->pop());
cell value = untag_fixnum(ctx->pop());
set_string_nth_slow(str,index,value);
str->data()[index] = (u8)value;
}
}

View File

@ -1,6 +1,27 @@
namespace factor
{
inline static void memset_2(void *dst, u16 pattern, size_t size)
{
#ifdef __APPLE__
cell cell_pattern = (pattern | (pattern << 16));
memset_pattern4(dst,&cell_pattern,size);
#else
if(pattern == 0)
memset(dst,0,size);
else
{
u16 *start = (u16 *)dst;
u16 *end = (u16 *)((cell)dst + size);
while(start < end)
{
*start = pattern;
start++;
}
}
#endif
}
inline static void memset_cell(void *dst, cell pattern, size_t size)
{
#ifdef __APPLE__

View File

@ -381,10 +381,6 @@ struct factor_vm
cell std_vector_to_array(std::vector<cell> &elements);
// strings
cell string_nth(const string *str, cell index);
void set_string_nth_fast(string *str, cell index, cell ch);
void set_string_nth_slow(string *str_, cell index, cell ch);
void set_string_nth(string *str, cell index, cell ch);
string *allot_string_internal(cell capacity);
void fill_string(string *str_, cell start, cell capacity, cell fill);
string *allot_string(cell capacity, cell fill);
@ -392,9 +388,7 @@ struct factor_vm
bool reallot_string_in_place_p(string *str, cell capacity);
string* reallot_string(string *str_, cell capacity);
void primitive_resize_string();
void primitive_string_nth();
void primitive_set_string_nth_fast();
void primitive_set_string_nth_slow();
// booleans
cell tag_boolean(cell untagged)