strings: move string-nth primitive out of the VM and into the library
parent
458fd007be
commit
a9ccfa8f05
|
@ -89,12 +89,6 @@ INSN: ##set-slot-imm
|
|||
use: src/tagged-rep obj/tagged-rep
|
||||
literal: slot tag ;
|
||||
|
||||
! String element access
|
||||
INSN: ##string-nth
|
||||
def: dst/int-rep
|
||||
use: obj/tagged-rep index/int-rep
|
||||
temp: temp/int-rep ;
|
||||
|
||||
! Register transfers
|
||||
INSN: ##copy
|
||||
def: dst
|
||||
|
@ -806,7 +800,6 @@ UNION: kill-vreg-insn
|
|||
UNION: def-is-use-insn
|
||||
##box-alien
|
||||
##box-displaced-alien
|
||||
##string-nth
|
||||
##unbox-any-c-ptr ;
|
||||
|
||||
SYMBOL: vreg-insn
|
||||
|
|
|
@ -56,7 +56,7 @@ IN: compiler.cfg.intrinsics
|
|||
{ kernel:eq? [ emit-eq ] }
|
||||
{ slots.private:slot [ emit-slot ] }
|
||||
{ slots.private:set-slot [ emit-set-slot ] }
|
||||
{ strings.private:string-nth [ drop emit-string-nth ] }
|
||||
{ strings.private:string-nth-fast [ drop emit-string-nth-fast ] }
|
||||
{ strings.private:set-string-nth-fast [ drop emit-set-string-nth-fast ] }
|
||||
{ classes.tuple.private:<tuple-boa> [ emit-<tuple-boa> ] }
|
||||
{ arrays:<array> [ emit-<array> ] }
|
||||
|
|
|
@ -5,9 +5,11 @@ compiler.cfg.instructions compiler.cfg.registers
|
|||
compiler.cfg.stacks cpu.architecture ;
|
||||
IN: compiler.cfg.intrinsics.strings
|
||||
|
||||
: emit-string-nth ( -- )
|
||||
2inputs swap ^^string-nth ds-push ;
|
||||
: (string-nth) ( n string -- base offset rep c-type )
|
||||
^^tagged>integer swap ^^add string-offset int-rep uchar ; inline
|
||||
|
||||
: emit-string-nth-fast ( -- )
|
||||
2inputs (string-nth) ^^load-memory-imm ds-push ;
|
||||
|
||||
: emit-set-string-nth-fast ( -- )
|
||||
3inputs ^^tagged>integer ^^add string-offset
|
||||
int-rep uchar ##store-memory-imm ;
|
||||
3inputs (string-nth) ##store-memory-imm ;
|
||||
|
|
|
@ -93,7 +93,6 @@ CODEGEN: ##slot %slot
|
|||
CODEGEN: ##slot-imm %slot-imm
|
||||
CODEGEN: ##set-slot %set-slot
|
||||
CODEGEN: ##set-slot-imm %set-slot-imm
|
||||
CODEGEN: ##string-nth %string-nth
|
||||
CODEGEN: ##add %add
|
||||
CODEGEN: ##add-imm %add-imm
|
||||
CODEGEN: ##sub %sub
|
||||
|
|
|
@ -97,15 +97,6 @@ IN: compiler.tests.low-level-ir
|
|||
} compile-test-bb
|
||||
] unit-test
|
||||
|
||||
[ CHAR: l ] [
|
||||
V{
|
||||
T{ ##load-reference f 0 "hello world" }
|
||||
T{ ##load-tagged f 1 3 }
|
||||
T{ ##string-nth f 0 0 1 2 }
|
||||
T{ ##shl-imm f 0 0 4 }
|
||||
} compile-test-bb
|
||||
] unit-test
|
||||
|
||||
[ 1 ] [
|
||||
V{
|
||||
T{ ##load-tagged f 0 32 }
|
||||
|
|
|
@ -254,8 +254,8 @@ generic-comparison-ops [
|
|||
] "outputs" set-word-prop
|
||||
] each
|
||||
|
||||
\ string-nth [
|
||||
2drop fixnum 0 23 2^ [a,b] <class/interval-info>
|
||||
\ string-nth-fast [
|
||||
2drop fixnum 0 255 [a,b] <class/interval-info>
|
||||
] "outputs" set-word-prop
|
||||
|
||||
{
|
||||
|
|
|
@ -968,3 +968,10 @@ M: tuple-with-read-only-slot clone
|
|||
|
||||
[ t ] [ [ { 1 } diff ] { diff } inlined? ] unit-test
|
||||
[ f ] [ [ { 1 } swap diff ] { diff } inlined? ] unit-test ! We could do this
|
||||
|
||||
! Output range for string-nth now that string-nth is a library word and
|
||||
! not a primitive
|
||||
[ t ] [
|
||||
! Should actually be 0 23 2^ 1 - [a,b]
|
||||
[ string-nth ] final-info first interval>> 0 23 2^ [a,b] =
|
||||
] unit-test
|
||||
|
|
|
@ -244,8 +244,6 @@ HOOK: %slot-imm cpu ( dst obj slot tag -- )
|
|||
HOOK: %set-slot cpu ( src obj slot scale tag -- )
|
||||
HOOK: %set-slot-imm cpu ( src obj slot tag -- )
|
||||
|
||||
HOOK: %string-nth cpu ( dst obj index temp -- )
|
||||
|
||||
HOOK: %add cpu ( dst src1 src2 -- )
|
||||
HOOK: %add-imm cpu ( dst src1 src2 -- )
|
||||
HOOK: %sub cpu ( dst src1 src2 -- )
|
||||
|
|
|
@ -144,22 +144,6 @@ M: ppc %slot-imm ( dst obj slot tag -- ) slot-offset LWZ ;
|
|||
M: ppc %set-slot ( src obj slot -- ) swapd STWX ;
|
||||
M: ppc %set-slot-imm ( src obj slot tag -- ) slot-offset STW ;
|
||||
|
||||
M:: ppc %string-nth ( dst src index temp -- )
|
||||
[
|
||||
"end" define-label
|
||||
temp src index ADD
|
||||
dst temp string-offset LBZ
|
||||
0 dst HEX: 80 CMPI
|
||||
"end" get BLT
|
||||
temp src string-aux-offset LWZ
|
||||
temp temp index ADD
|
||||
temp temp index ADD
|
||||
temp temp byte-array-offset LHZ
|
||||
temp temp 7 SLWI
|
||||
dst dst temp XOR
|
||||
"end" resolve-label
|
||||
] with-scope ;
|
||||
|
||||
M: ppc %add ADD ;
|
||||
M: ppc %add-imm ADDI ;
|
||||
M: ppc %sub swap SUBF ;
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
USING: bootstrap.image.private compiler.constants
|
||||
compiler.units cpu.x86.assembler cpu.x86.assembler.operands
|
||||
kernel kernel.private layouts locals.backend make math
|
||||
math.private namespaces sequences slots.private vocabs ;
|
||||
math.private namespaces sequences slots.private strings.private
|
||||
vocabs ;
|
||||
IN: bootstrap.x86
|
||||
|
||||
big-endian off
|
||||
|
@ -294,6 +295,21 @@ big-endian off
|
|||
ds-reg [] temp0 MOV
|
||||
] \ slot define-sub-primitive
|
||||
|
||||
[
|
||||
! load string index from stack
|
||||
temp0 ds-reg bootstrap-cell neg [+] MOV
|
||||
temp0 tag-bits get SHR
|
||||
! load string from stack
|
||||
temp1 ds-reg [] MOV
|
||||
! load character
|
||||
temp0 8-bit-version-of temp0 temp1 string-offset [++] MOV
|
||||
temp0 temp0 8-bit-version-of MOVZX
|
||||
temp0 tag-bits get SHL
|
||||
! store character to stack
|
||||
ds-reg bootstrap-cell SUB
|
||||
ds-reg [] temp0 MOV
|
||||
] \ string-nth-fast define-sub-primitive
|
||||
|
||||
! Shufflers
|
||||
[
|
||||
ds-reg bootstrap-cell SUB
|
||||
|
|
|
@ -328,32 +328,6 @@ M: x86.64 has-small-reg? 2drop t ;
|
|||
[ quot call ] with-save/restore
|
||||
] if ; inline
|
||||
|
||||
M:: x86 %string-nth ( dst src index temp -- )
|
||||
! We request a small-reg of size 8 since those of size 16 are
|
||||
! a superset.
|
||||
"end" define-label
|
||||
dst { src index temp } 8 [| new-dst |
|
||||
! Load the least significant 7 bits into new-dst.
|
||||
! 8th bit indicates whether we have to load from
|
||||
! the aux vector or not.
|
||||
new-dst 8-bit-version-of src index string-offset [++] MOV
|
||||
new-dst new-dst 8-bit-version-of MOVZX
|
||||
! Do we have to look at the aux vector?
|
||||
new-dst HEX: 80 CMP
|
||||
"end" get JL
|
||||
! Yes, this is a non-ASCII character. Load aux vector
|
||||
temp src string-aux-offset [+] MOV
|
||||
new-dst temp XCHG
|
||||
! Load high 16 bits
|
||||
new-dst 16-bit-version-of new-dst index byte-array-offset [+*2+] MOV
|
||||
new-dst new-dst 16-bit-version-of MOVZX
|
||||
new-dst 7 SHL
|
||||
! Compute code point
|
||||
new-dst temp XOR
|
||||
"end" resolve-label
|
||||
dst new-dst int-rep %copy
|
||||
] with-small-register ;
|
||||
|
||||
:: %alien-integer-getter ( dst exclude address bits quot -- )
|
||||
dst exclude bits [| new-dst |
|
||||
new-dst dup bits n-bit-version-of dup address MOV
|
||||
|
|
|
@ -454,11 +454,10 @@ M: bad-executable summary
|
|||
\ set-slot { object object fixnum } { } define-primitive
|
||||
\ set-special-object { object fixnum } { } define-primitive
|
||||
\ set-string-nth-fast { fixnum fixnum string } { } define-primitive
|
||||
\ set-string-nth-slow { fixnum fixnum string } { } define-primitive
|
||||
\ size { object } { fixnum } define-primitive \ size make-flushable
|
||||
\ slot { object fixnum } { object } define-primitive \ slot make-flushable
|
||||
\ special-object { fixnum } { object } define-primitive \ special-object make-flushable
|
||||
\ string-nth { fixnum string } { fixnum } define-primitive \ string-nth make-flushable
|
||||
\ string-nth-fast { fixnum string } { fixnum } define-primitive \ string-nth-fast make-flushable
|
||||
\ strip-stack-traces { } { } define-primitive
|
||||
\ system-micros { } { integer } define-primitive \ system-micros make-flushable
|
||||
\ tag { object } { fixnum } define-primitive \ tag make-foldable
|
||||
|
|
|
@ -370,6 +370,7 @@ tuple
|
|||
{ "fixnum<=" "math.private" (( x y -- z )) }
|
||||
{ "fixnum>" "math.private" (( x y -- ? )) }
|
||||
{ "fixnum>=" "math.private" (( x y -- ? )) }
|
||||
{ "string-nth-fast" "strings.private" (( n string -- ch )) }
|
||||
{ "(set-context)" "threads.private" (( obj context -- obj' )) }
|
||||
{ "(set-context-and-delete)" "threads.private" (( obj context -- * )) }
|
||||
{ "(start-context)" "threads.private" (( obj quot -- obj' )) }
|
||||
|
@ -533,8 +534,6 @@ tuple
|
|||
{ "<string>" "strings" "primitive_string" (( n ch -- string )) }
|
||||
{ "resize-string" "strings" "primitive_resize_string" (( n str -- newstr )) }
|
||||
{ "set-string-nth-fast" "strings.private" "primitive_set_string_nth_fast" (( ch n string -- )) }
|
||||
{ "set-string-nth-slow" "strings.private" "primitive_set_string_nth_slow" (( ch n string -- )) }
|
||||
{ "string-nth" "strings.private" "primitive_string_nth" (( n string -- ch )) }
|
||||
{ "(exit)" "system" "primitive_exit" (( n -- * )) }
|
||||
{ "nano-count" "system" "primitive_nano_count" (( -- ns )) }
|
||||
{ "system-micros" "system" "primitive_system_micros" (( -- us )) }
|
||||
|
|
|
@ -85,6 +85,9 @@ unit-test
|
|||
"s" get >array
|
||||
] unit-test
|
||||
|
||||
! Make sure string initialization works
|
||||
[ HEX: 123456 ] [ 100 HEX: 123456 <string> first ] unit-test
|
||||
|
||||
! Make sure we clear aux vector when storing octets
|
||||
[ "\u123456hi" ] [ "ih\u123456" clone reverse! ] unit-test
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2003, 2008 Slava Pestov.
|
||||
! Copyright (C) 2003, 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math.private sequences kernel.private
|
||||
math sequences.private slots.private alien.accessors ;
|
||||
USING: accessors alien.accessors byte-arrays kernel math.private
|
||||
sequences kernel.private math sequences.private slots.private ;
|
||||
IN: strings
|
||||
|
||||
<PRIVATE
|
||||
|
@ -16,8 +16,31 @@ IN: strings
|
|||
: rehash-string ( str -- )
|
||||
1 over sequence-hashcode swap set-string-hashcode ; inline
|
||||
|
||||
: (aux) ( n string -- byte-array m )
|
||||
aux>> { byte-array } declare swap 1 fixnum-shift-fast ; inline
|
||||
|
||||
: small-char? ( ch -- ? ) HEX: 7f fixnum<= ; inline
|
||||
|
||||
: string-nth ( n string -- ch )
|
||||
2dup string-nth-fast dup small-char?
|
||||
[ 2nip ] [
|
||||
[ (aux) alien-unsigned-2 7 fixnum-shift-fast ] dip
|
||||
fixnum-bitxor
|
||||
] if ; inline
|
||||
|
||||
: ensure-aux ( string -- string )
|
||||
dup aux>> [ dup length 2 * (byte-array) >>aux ] unless ; inline
|
||||
|
||||
: set-string-nth-slow ( ch n string -- )
|
||||
[ [ HEX: 80 fixnum-bitor ] 2dip set-string-nth-fast ]
|
||||
[
|
||||
ensure-aux
|
||||
[ -7 fixnum-shift-fast 1 fixnum-bitxor ] 2dip
|
||||
(aux) set-alien-unsigned-2
|
||||
] 3bi ;
|
||||
|
||||
: set-string-nth ( ch n string -- )
|
||||
pick HEX: 7f fixnum<=
|
||||
pick small-char?
|
||||
[ set-string-nth-fast ] [ set-string-nth-slow ] if ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -6,7 +6,7 @@ namespace factor
|
|||
std::ostream &operator<<(std::ostream &out, const string *str)
|
||||
{
|
||||
for(cell i = 0; i < string_capacity(str); i++)
|
||||
out << (char)str->nth(i);
|
||||
out << (char)str->data()[i];
|
||||
return out;
|
||||
}
|
||||
|
||||
|
|
|
@ -205,8 +205,6 @@ struct string : public object {
|
|||
cell hashcode;
|
||||
|
||||
u8 *data() const { return (u8 *)(this + 1); }
|
||||
|
||||
cell nth(cell i) const;
|
||||
};
|
||||
|
||||
struct code_block;
|
||||
|
|
|
@ -120,12 +120,10 @@ namespace factor
|
|||
_(set_slot) \
|
||||
_(set_special_object) \
|
||||
_(set_string_nth_fast) \
|
||||
_(set_string_nth_slow) \
|
||||
_(size) \
|
||||
_(sleep) \
|
||||
_(special_object) \
|
||||
_(string) \
|
||||
_(string_nth) \
|
||||
_(strip_stack_traces) \
|
||||
_(system_micros) \
|
||||
_(tuple) \
|
||||
|
|
|
@ -3,66 +3,6 @@
|
|||
namespace factor
|
||||
{
|
||||
|
||||
cell string::nth(cell index) const
|
||||
{
|
||||
/* If high bit is set, the most significant 16 bits of the char
|
||||
come from the aux vector. The least significant bit of the
|
||||
corresponding aux vector entry is negated, so that we can
|
||||
XOR the two components together and get the original code point
|
||||
back. */
|
||||
cell lo_bits = data()[index];
|
||||
|
||||
if((lo_bits & 0x80) == 0)
|
||||
return lo_bits;
|
||||
else
|
||||
{
|
||||
byte_array *aux = untag<byte_array>(this->aux);
|
||||
cell hi_bits = aux->data<u16>()[index];
|
||||
return (hi_bits << 7) ^ lo_bits;
|
||||
}
|
||||
}
|
||||
|
||||
void factor_vm::set_string_nth_fast(string *str, cell index, cell ch)
|
||||
{
|
||||
str->data()[index] = (u8)ch;
|
||||
}
|
||||
|
||||
void factor_vm::set_string_nth_slow(string *str_, cell index, cell ch)
|
||||
{
|
||||
data_root<string> str(str_,this);
|
||||
|
||||
byte_array *aux;
|
||||
|
||||
str->data()[index] = ((ch & 0x7f) | 0x80);
|
||||
|
||||
if(to_boolean(str->aux))
|
||||
aux = untag<byte_array>(str->aux);
|
||||
else
|
||||
{
|
||||
/* We don't need to pre-initialize the
|
||||
byte array with any data, since we
|
||||
only ever read from the aux vector
|
||||
if the most significant bit of a
|
||||
character is set. Initially all of
|
||||
the bits are clear. */
|
||||
aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * sizeof(u16));
|
||||
|
||||
str->aux = tag<byte_array>(aux);
|
||||
write_barrier(&str->aux);
|
||||
}
|
||||
|
||||
aux->data<u16>()[index] = (u16)((ch >> 7) ^ 1);
|
||||
}
|
||||
|
||||
/* allocates memory */
|
||||
void factor_vm::set_string_nth(string *str, cell index, cell ch)
|
||||
{
|
||||
if(ch <= 0x7f)
|
||||
set_string_nth_fast(str,index,ch);
|
||||
else
|
||||
set_string_nth_slow(str,index,ch);
|
||||
}
|
||||
|
||||
/* Allocates memory */
|
||||
string *factor_vm::allot_string_internal(cell capacity)
|
||||
{
|
||||
|
@ -81,13 +21,23 @@ void factor_vm::fill_string(string *str_, cell start, cell capacity, cell fill)
|
|||
data_root<string> str(str_,this);
|
||||
|
||||
if(fill <= 0x7f)
|
||||
memset(&str->data()[start],(int)fill,capacity - start);
|
||||
memset(&str->data()[start],(u8)fill,capacity - start);
|
||||
else
|
||||
{
|
||||
cell i;
|
||||
byte_array *aux;
|
||||
if(to_boolean(str->aux))
|
||||
aux = untag<byte_array>(str->aux);
|
||||
else
|
||||
{
|
||||
aux = allot_uninitialized_array<byte_array>(untag_fixnum(str->length) * 2);
|
||||
str->aux = tag<byte_array>(aux);
|
||||
write_barrier(&str->aux);
|
||||
}
|
||||
|
||||
for(i = start; i < capacity; i++)
|
||||
set_string_nth(str.untagged(),i,fill);
|
||||
u8 lo_fill = (u8)((fill & 0x7f) | 0x80);
|
||||
u16 hi_fill = (u16)((fill >> 7) ^ 0x1);
|
||||
memset(&str->data()[start],lo_fill,capacity - start);
|
||||
memset_2(&aux->data<u16>()[start],hi_fill,(capacity - start) * sizeof(u16));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,8 +91,7 @@ string* factor_vm::reallot_string(string *str_, cell capacity)
|
|||
|
||||
if(to_boolean(str->aux))
|
||||
{
|
||||
byte_array *new_aux = allot_byte_array(capacity * sizeof(u16));
|
||||
|
||||
byte_array *new_aux = allot_uninitialized_array<byte_array>(capacity * 2);
|
||||
new_str->aux = tag<byte_array>(new_aux);
|
||||
write_barrier(&new_str->aux);
|
||||
|
||||
|
@ -163,27 +112,12 @@ void factor_vm::primitive_resize_string()
|
|||
ctx->push(tag<string>(reallot_string(str.untagged(),capacity)));
|
||||
}
|
||||
|
||||
void factor_vm::primitive_string_nth()
|
||||
{
|
||||
string *str = untag<string>(ctx->pop());
|
||||
cell index = untag_fixnum(ctx->pop());
|
||||
ctx->push(tag_fixnum(str->nth(index)));
|
||||
}
|
||||
|
||||
void factor_vm::primitive_set_string_nth_fast()
|
||||
{
|
||||
string *str = untag<string>(ctx->pop());
|
||||
cell index = untag_fixnum(ctx->pop());
|
||||
cell value = untag_fixnum(ctx->pop());
|
||||
set_string_nth_fast(str,index,value);
|
||||
}
|
||||
|
||||
void factor_vm::primitive_set_string_nth_slow()
|
||||
{
|
||||
string *str = untag<string>(ctx->pop());
|
||||
cell index = untag_fixnum(ctx->pop());
|
||||
cell value = untag_fixnum(ctx->pop());
|
||||
set_string_nth_slow(str,index,value);
|
||||
str->data()[index] = (u8)value;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,6 +1,27 @@
|
|||
namespace factor
|
||||
{
|
||||
|
||||
inline static void memset_2(void *dst, u16 pattern, size_t size)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
cell cell_pattern = (pattern | (pattern << 16));
|
||||
memset_pattern4(dst,&cell_pattern,size);
|
||||
#else
|
||||
if(pattern == 0)
|
||||
memset(dst,0,size);
|
||||
else
|
||||
{
|
||||
u16 *start = (u16 *)dst;
|
||||
u16 *end = (u16 *)((cell)dst + size);
|
||||
while(start < end)
|
||||
{
|
||||
*start = pattern;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void memset_cell(void *dst, cell pattern, size_t size)
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
|
|
|
@ -381,10 +381,6 @@ struct factor_vm
|
|||
cell std_vector_to_array(std::vector<cell> &elements);
|
||||
|
||||
// strings
|
||||
cell string_nth(const string *str, cell index);
|
||||
void set_string_nth_fast(string *str, cell index, cell ch);
|
||||
void set_string_nth_slow(string *str_, cell index, cell ch);
|
||||
void set_string_nth(string *str, cell index, cell ch);
|
||||
string *allot_string_internal(cell capacity);
|
||||
void fill_string(string *str_, cell start, cell capacity, cell fill);
|
||||
string *allot_string(cell capacity, cell fill);
|
||||
|
@ -392,9 +388,7 @@ struct factor_vm
|
|||
bool reallot_string_in_place_p(string *str, cell capacity);
|
||||
string* reallot_string(string *str_, cell capacity);
|
||||
void primitive_resize_string();
|
||||
void primitive_string_nth();
|
||||
void primitive_set_string_nth_fast();
|
||||
void primitive_set_string_nth_slow();
|
||||
|
||||
// booleans
|
||||
cell tag_boolean(cell untagged)
|
||||
|
|
Loading…
Reference in New Issue