Unicode script uses interval maps

db4
Daniel Ehrenberg 2008-05-05 23:47:22 -05:00
parent e3808cc503
commit d5f63983c3
1 changed files with 20 additions and 26 deletions

View File

@ -1,12 +1,12 @@
USING: unicode.syntax.backend kernel sequences assocs io.files USING: unicode.syntax.backend kernel sequences assocs io.files
io.encodings ascii math.ranges io splitting math.parser io.encodings ascii math.ranges io splitting math.parser
namespaces byte-arrays locals math sets io.encodings.ascii namespaces byte-arrays locals math sets io.encodings.ascii
words compiler.units ; words compiler.units arrays interval-maps ;
IN: unicode.script IN: unicode.script
<PRIVATE <PRIVATE
VALUE: char>num-table VALUE: script-table
VALUE: num>name-table SYMBOL: interned
: parse-script ( stream -- assoc ) : parse-script ( stream -- assoc )
! assoc is code point/range => name ! assoc is code point/range => name
@ -14,26 +14,18 @@ VALUE: num>name-table
";" split1 [ [ blank? ] trim ] bi@ ";" split1 [ [ blank? ] trim ] bi@
] H{ } map>assoc ; ] H{ } map>assoc ;
: set-if ( value var -- ) : range, ( value key -- )
dup 500000 < [ set ] [ 2drop ] if ; swap interned get
[ word-name = ] with find nip 2array , ;
: expand-ranges ( assoc -- char-assoc ) : expand-ranges ( assoc -- interval-map )
! char-assoc is code point => name [
[ [ [
CHAR: . pick member? [ CHAR: . pick member? [
swap ".." split1 [ hex> ] bi@ [a,b] swap ".." split1 [ hex> ] bi@ 2array
[ set-if ] with each ] [ swap hex> ] if range,
] [ swap hex> set-if ] if ] assoc-each
] assoc-each ] H{ } make-assoc ; ] { } make <interval-map> ;
: hash>byte-array ( hash -- byte-array )
[ keys supremum 1+ <byte-array> dup ] keep
[ -rot set-nth ] with assoc-each ;
: make-char>num ( assoc -- char>num-table )
expand-ranges
[ num>name-table index ] assoc-map
hash>byte-array ;
: >symbols ( strings -- symbols ) : >symbols ( strings -- symbols )
[ [
@ -41,9 +33,9 @@ VALUE: num>name-table
] with-compilation-unit ; ] with-compilation-unit ;
: process-script ( ranges -- ) : process-script ( ranges -- )
[ values prune \ num>name-table set-value ] dup values prune >symbols interned [
[ make-char>num \ char>num-table set-value ] bi expand-ranges \ script-table set-value
num>name-table >symbols \ num>name-table set-value ; ] with-variable ;
: load-script ( -- ) : load-script ( -- )
"resource:extra/unicode/script/Scripts.txt" "resource:extra/unicode/script/Scripts.txt"
@ -52,5 +44,7 @@ VALUE: num>name-table
load-script load-script
PRIVATE> PRIVATE>
SYMBOL: Unknown
: script-of ( char -- script ) : script-of ( char -- script )
char>num-table nth num>name-table nth ; script-table interval-at [ Unknown ] unless* ;