Unicode script uses interval maps
parent
e3808cc503
commit
d5f63983c3
|
@ -1,12 +1,12 @@
|
||||||
USING: unicode.syntax.backend kernel sequences assocs io.files
|
USING: unicode.syntax.backend kernel sequences assocs io.files
|
||||||
io.encodings ascii math.ranges io splitting math.parser
|
io.encodings ascii math.ranges io splitting math.parser
|
||||||
namespaces byte-arrays locals math sets io.encodings.ascii
|
namespaces byte-arrays locals math sets io.encodings.ascii
|
||||||
words compiler.units ;
|
words compiler.units arrays interval-maps ;
|
||||||
IN: unicode.script
|
IN: unicode.script
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
VALUE: char>num-table
|
VALUE: script-table
|
||||||
VALUE: num>name-table
|
SYMBOL: interned
|
||||||
|
|
||||||
: parse-script ( stream -- assoc )
|
: parse-script ( stream -- assoc )
|
||||||
! assoc is code point/range => name
|
! assoc is code point/range => name
|
||||||
|
@ -14,26 +14,18 @@ VALUE: num>name-table
|
||||||
";" split1 [ [ blank? ] trim ] bi@
|
";" split1 [ [ blank? ] trim ] bi@
|
||||||
] H{ } map>assoc ;
|
] H{ } map>assoc ;
|
||||||
|
|
||||||
: set-if ( value var -- )
|
: range, ( value key -- )
|
||||||
dup 500000 < [ set ] [ 2drop ] if ;
|
swap interned get
|
||||||
|
[ word-name = ] with find nip 2array , ;
|
||||||
|
|
||||||
: expand-ranges ( assoc -- char-assoc )
|
: expand-ranges ( assoc -- interval-map )
|
||||||
! char-assoc is code point => name
|
[
|
||||||
[ [
|
[
|
||||||
CHAR: . pick member? [
|
CHAR: . pick member? [
|
||||||
swap ".." split1 [ hex> ] bi@ [a,b]
|
swap ".." split1 [ hex> ] bi@ 2array
|
||||||
[ set-if ] with each
|
] [ swap hex> ] if range,
|
||||||
] [ swap hex> set-if ] if
|
] assoc-each
|
||||||
] assoc-each ] H{ } make-assoc ;
|
] { } make <interval-map> ;
|
||||||
|
|
||||||
: hash>byte-array ( hash -- byte-array )
|
|
||||||
[ keys supremum 1+ <byte-array> dup ] keep
|
|
||||||
[ -rot set-nth ] with assoc-each ;
|
|
||||||
|
|
||||||
: make-char>num ( assoc -- char>num-table )
|
|
||||||
expand-ranges
|
|
||||||
[ num>name-table index ] assoc-map
|
|
||||||
hash>byte-array ;
|
|
||||||
|
|
||||||
: >symbols ( strings -- symbols )
|
: >symbols ( strings -- symbols )
|
||||||
[
|
[
|
||||||
|
@ -41,9 +33,9 @@ VALUE: num>name-table
|
||||||
] with-compilation-unit ;
|
] with-compilation-unit ;
|
||||||
|
|
||||||
: process-script ( ranges -- )
|
: process-script ( ranges -- )
|
||||||
[ values prune \ num>name-table set-value ]
|
dup values prune >symbols interned [
|
||||||
[ make-char>num \ char>num-table set-value ] bi
|
expand-ranges \ script-table set-value
|
||||||
num>name-table >symbols \ num>name-table set-value ;
|
] with-variable ;
|
||||||
|
|
||||||
: load-script ( -- )
|
: load-script ( -- )
|
||||||
"resource:extra/unicode/script/Scripts.txt"
|
"resource:extra/unicode/script/Scripts.txt"
|
||||||
|
@ -52,5 +44,7 @@ VALUE: num>name-table
|
||||||
load-script
|
load-script
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
SYMBOL: Unknown
|
||||||
|
|
||||||
: script-of ( char -- script )
|
: script-of ( char -- script )
|
||||||
char>num-table nth num>name-table nth ;
|
script-table interval-at [ Unknown ] unless* ;
|
||||||
|
|
Loading…
Reference in New Issue