unicode.script
parent
185eadf3be
commit
e1b5a081d2
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
Daniel Ehrenberg
|
|
@ -0,0 +1,8 @@
|
|||
USING: help.syntax help.markup ;
|
||||
IN: unicode.script
|
||||
|
||||
HELP: script-of
|
||||
{ $values { "char" "a code point" } { "script" "a symbol" } }
|
||||
{ $description "Gets a symbol representing the code point of a given character. The word name of the symbol is the same as the one " } ;
|
||||
|
||||
ABOUT: script-of
|
|
@ -0,0 +1,4 @@
|
|||
USING: unicode.script tools.test ;
|
||||
|
||||
[ Latin ] [ CHAR: a script-of ] unit-test
|
||||
[ Common ] [ 0 script-of ] unit-test
|
|
@ -0,0 +1,56 @@
|
|||
USING: unicode.syntax.backend kernel sequences assocs io.files
|
||||
io.encodings ascii math.ranges io splitting math.parser
|
||||
namespaces byte-arrays locals math sets io.encodings.ascii
|
||||
words compiler.units ;
|
||||
IN: unicode.script
|
||||
|
||||
<PRIVATE
|
||||
VALUE: char>num-table
|
||||
VALUE: num>name-table
|
||||
|
||||
: parse-script ( stream -- assoc )
|
||||
! assoc is code point/range => name
|
||||
lines [ "#" split1 drop ] map [ empty? not ] filter [
|
||||
";" split1 [ [ blank? ] trim ] bi@
|
||||
] H{ } map>assoc ;
|
||||
|
||||
: set-if ( value var -- )
|
||||
dup 500000 < [ set ] [ 2drop ] if ;
|
||||
|
||||
: expand-ranges ( assoc -- char-assoc )
|
||||
! char-assoc is code point => name
|
||||
[ [
|
||||
CHAR: . pick member? [
|
||||
swap ".." split1 [ hex> ] bi@ [a,b]
|
||||
[ set-if ] with each
|
||||
] [ swap hex> set-if ] if
|
||||
] assoc-each ] H{ } make-assoc ;
|
||||
|
||||
: hash>byte-array ( hash -- byte-array )
|
||||
[ keys supremum 1+ <byte-array> dup ] keep
|
||||
[ -rot set-nth ] with assoc-each ;
|
||||
|
||||
: make-char>num ( assoc -- char>num-table )
|
||||
expand-ranges
|
||||
[ num>name-table index ] assoc-map
|
||||
hash>byte-array ;
|
||||
|
||||
: >symbols ( strings -- symbols )
|
||||
[
|
||||
[ "unicode.script" create dup define-symbol ] map
|
||||
] with-compilation-unit ;
|
||||
|
||||
: process-script ( ranges -- )
|
||||
[ values prune \ num>name-table set-value ]
|
||||
[ make-char>num \ char>num-table set-value ] bi
|
||||
num>name-table >symbols \ num>name-table set-value ;
|
||||
|
||||
: load-script ( -- )
|
||||
"resource:extra/unicode/script/Scripts.txt"
|
||||
ascii <file-reader> parse-script process-script ;
|
||||
|
||||
load-script
|
||||
PRIVATE>
|
||||
|
||||
: script-of ( char -- script )
|
||||
char>num-table nth num>name-table nth ;
|
|
@ -0,0 +1 @@
|
|||
Reads the UCD to get the script of a code point
|
Loading…
Reference in New Issue