Changes to Unicode
parent
58668874ad
commit
edf1f27247
|
@ -14,3 +14,11 @@ IN: const
|
|||
|
||||
: ENUM:
|
||||
";" parse-tokens [ create-in ] map define-enum ; parsing
|
||||
|
||||
: define-value ( word -- )
|
||||
{ f } clone [ first ] curry define ;
|
||||
|
||||
: VALUE: CREATE define-value ; parsing
|
||||
|
||||
: set-value ( value word -- )
|
||||
word-def first set-first ;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
USING: unicode.categories kernel math combinators splitting
|
||||
sequences math.parser io.files io assocs arrays namespaces
|
||||
combinators.lib assocs.lib math.ranges unicode.normalize
|
||||
unicode.syntax unicode.data compiler.units alien.syntax ;
|
||||
unicode.syntax unicode.data compiler.units alien.syntax const ;
|
||||
IN: unicode.breaks
|
||||
|
||||
C-ENUM: Any L V T Extend Control CR LF graphemes ;
|
||||
|
@ -32,7 +32,7 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
|||
: other-extend-lines ( -- lines )
|
||||
"extra/unicode/PropList.txt" resource-path file-lines ;
|
||||
|
||||
DEFER: other-extend
|
||||
VALUE: other-extend
|
||||
|
||||
CATEGORY: (extend) Me Mn ;
|
||||
: extend? ( ch -- ? )
|
||||
|
@ -77,7 +77,7 @@ SYMBOL: table
|
|||
T T connect
|
||||
graphemes Extend connect-after ;
|
||||
|
||||
DEFER: grapheme-table
|
||||
VALUE: grapheme-table
|
||||
|
||||
: grapheme-break? ( class1 class2 -- ? )
|
||||
grapheme-table nth nth not ;
|
||||
|
@ -113,10 +113,10 @@ DEFER: grapheme-table
|
|||
[ grapheme-class dup rot grapheme-break? ] find-last-index
|
||||
nip -1 or 1+ ;
|
||||
|
||||
<<
|
||||
other-extend-lines process-other-extend \ other-extend define-value
|
||||
[
|
||||
other-extend-lines process-other-extend \ other-extend set-value
|
||||
|
||||
init-grapheme-table table
|
||||
[ make-grapheme-table finish-table ] with-variable
|
||||
\ grapheme-table define-value
|
||||
>>
|
||||
\ grapheme-table set-value
|
||||
] with-compilation-unit
|
||||
|
|
|
@ -1,15 +1,12 @@
|
|||
USING: assocs math kernel sequences io.files hashtables
|
||||
quotations splitting arrays math.parser combinators.lib hash2
|
||||
byte-arrays words namespaces words compiler.units ;
|
||||
byte-arrays words namespaces words compiler.units const ;
|
||||
IN: unicode.data
|
||||
|
||||
! Convenience functions
|
||||
: 1+* ( n/f _ -- n+1 )
|
||||
drop [ 1+ ] [ 0 ] if* ;
|
||||
|
||||
: define-value ( value word -- )
|
||||
swap 1quotation define ;
|
||||
|
||||
: ?between? ( n/f from to -- ? )
|
||||
pick [ between? ] [ 3drop f ] if ;
|
||||
|
||||
|
@ -107,16 +104,16 @@ C: <code-point> code-point
|
|||
4 head [ multihex ] map first4
|
||||
<code-point> swap first set ;
|
||||
|
||||
DEFER: simple-lower
|
||||
DEFER: simple-upper
|
||||
DEFER: simple-title
|
||||
DEFER: canonical-map
|
||||
DEFER: combine-map
|
||||
DEFER: class-map
|
||||
DEFER: compat-map
|
||||
DEFER: category-map
|
||||
DEFER: name-map
|
||||
DEFER: special-casing
|
||||
VALUE: simple-lower
|
||||
VALUE: simple-upper
|
||||
VALUE: simple-title
|
||||
VALUE: canonical-map
|
||||
VALUE: combine-map
|
||||
VALUE: class-map
|
||||
VALUE: compat-map
|
||||
VALUE: category-map
|
||||
VALUE: name-map
|
||||
VALUE: special-casing
|
||||
|
||||
: canonical-entry ( char -- seq ) canonical-map at ;
|
||||
: combine-chars ( a b -- char/f ) combine-map hash2 ;
|
||||
|
@ -132,16 +129,14 @@ DEFER: special-casing
|
|||
[ length 5 = ] subset
|
||||
[ [ set-code-point ] each ] H{ } make-assoc ;
|
||||
|
||||
[
|
||||
load-data
|
||||
dup process-names \ name-map define-value
|
||||
13 over process-data \ simple-lower define-value
|
||||
12 over process-data tuck \ simple-upper define-value
|
||||
14 over process-data swapd union \ simple-title define-value
|
||||
dup process-combining \ class-map define-value
|
||||
dup process-canonical \ canonical-map define-value
|
||||
\ combine-map define-value
|
||||
dup process-compat \ compat-map define-value
|
||||
process-category \ category-map define-value
|
||||
load-special-casing \ special-casing define-value
|
||||
] with-compilation-unit
|
||||
load-data
|
||||
dup process-names \ name-map set-value
|
||||
13 over process-data \ simple-lower set-value
|
||||
12 over process-data tuck \ simple-upper set-value
|
||||
14 over process-data swapd union \ simple-title set-value
|
||||
dup process-combining \ class-map set-value
|
||||
dup process-canonical \ canonical-map set-value
|
||||
\ combine-map set-value
|
||||
dup process-compat \ compat-map set-value
|
||||
process-category \ category-map set-value
|
||||
load-special-casing \ special-casing set-value
|
||||
|
|
|
@ -2,7 +2,7 @@ USING: sequences namespaces unicode.data kernel combinators.lib
|
|||
math arrays ;
|
||||
IN: unicode.normalize
|
||||
|
||||
! Utility word
|
||||
! Utility word--probably unnecessary
|
||||
: make* ( seq quot exemplar -- newseq )
|
||||
! quot has access to original seq on stack
|
||||
! this just makes the new-resizable the same length as seq
|
||||
|
@ -89,7 +89,7 @@ IN: unicode.normalize
|
|||
swap [ [
|
||||
dup hangul? [ hangul>jamo % drop ]
|
||||
[ dup rot call [ % ] [ , ] ?if ] if
|
||||
] with each ] "" make*
|
||||
] with each ] "" make
|
||||
dup reorder
|
||||
] if ; inline
|
||||
|
||||
|
@ -167,7 +167,7 @@ SYMBOL: char
|
|||
0 ind set
|
||||
SBUF" " clone after set
|
||||
pass-combining (compose)
|
||||
] "" make* ;
|
||||
] "" make ;
|
||||
|
||||
: nfc ( string -- nfc )
|
||||
nfd compose ;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: unicode.data kernel math sequences parser bit-arrays namespaces
|
||||
sequences.private arrays quotations classes.predicate ;
|
||||
sequences.private arrays quotations classes.predicate assocs ;
|
||||
IN: unicode.syntax
|
||||
|
||||
! Character classes (categories)
|
||||
|
@ -48,5 +48,5 @@ IN: unicode.syntax
|
|||
categories swap seq-minus define-category ; parsing
|
||||
|
||||
: UNICHAR:
|
||||
! This should be part of CHAR:
|
||||
! This should be part of CHAR:. Also, name-map at ==> name>char
|
||||
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing
|
||||
|
|
Loading…
Reference in New Issue