Changes to Unicode
parent
58668874ad
commit
edf1f27247
|
@ -14,3 +14,11 @@ IN: const
|
||||||
|
|
||||||
: ENUM:
|
: ENUM:
|
||||||
";" parse-tokens [ create-in ] map define-enum ; parsing
|
";" parse-tokens [ create-in ] map define-enum ; parsing
|
||||||
|
|
||||||
|
: define-value ( word -- )
|
||||||
|
{ f } clone [ first ] curry define ;
|
||||||
|
|
||||||
|
: VALUE: CREATE define-value ; parsing
|
||||||
|
|
||||||
|
: set-value ( value word -- )
|
||||||
|
word-def first set-first ;
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
USING: unicode.categories kernel math combinators splitting
|
USING: unicode.categories kernel math combinators splitting
|
||||||
sequences math.parser io.files io assocs arrays namespaces
|
sequences math.parser io.files io assocs arrays namespaces
|
||||||
combinators.lib assocs.lib math.ranges unicode.normalize
|
combinators.lib assocs.lib math.ranges unicode.normalize
|
||||||
unicode.syntax unicode.data compiler.units alien.syntax ;
|
unicode.syntax unicode.data compiler.units alien.syntax const ;
|
||||||
IN: unicode.breaks
|
IN: unicode.breaks
|
||||||
|
|
||||||
C-ENUM: Any L V T Extend Control CR LF graphemes ;
|
C-ENUM: Any L V T Extend Control CR LF graphemes ;
|
||||||
|
@ -32,7 +32,7 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
||||||
: other-extend-lines ( -- lines )
|
: other-extend-lines ( -- lines )
|
||||||
"extra/unicode/PropList.txt" resource-path file-lines ;
|
"extra/unicode/PropList.txt" resource-path file-lines ;
|
||||||
|
|
||||||
DEFER: other-extend
|
VALUE: other-extend
|
||||||
|
|
||||||
CATEGORY: (extend) Me Mn ;
|
CATEGORY: (extend) Me Mn ;
|
||||||
: extend? ( ch -- ? )
|
: extend? ( ch -- ? )
|
||||||
|
@ -77,7 +77,7 @@ SYMBOL: table
|
||||||
T T connect
|
T T connect
|
||||||
graphemes Extend connect-after ;
|
graphemes Extend connect-after ;
|
||||||
|
|
||||||
DEFER: grapheme-table
|
VALUE: grapheme-table
|
||||||
|
|
||||||
: grapheme-break? ( class1 class2 -- ? )
|
: grapheme-break? ( class1 class2 -- ? )
|
||||||
grapheme-table nth nth not ;
|
grapheme-table nth nth not ;
|
||||||
|
@ -113,10 +113,10 @@ DEFER: grapheme-table
|
||||||
[ grapheme-class dup rot grapheme-break? ] find-last-index
|
[ grapheme-class dup rot grapheme-break? ] find-last-index
|
||||||
nip -1 or 1+ ;
|
nip -1 or 1+ ;
|
||||||
|
|
||||||
<<
|
[
|
||||||
other-extend-lines process-other-extend \ other-extend define-value
|
other-extend-lines process-other-extend \ other-extend set-value
|
||||||
|
|
||||||
init-grapheme-table table
|
init-grapheme-table table
|
||||||
[ make-grapheme-table finish-table ] with-variable
|
[ make-grapheme-table finish-table ] with-variable
|
||||||
\ grapheme-table define-value
|
\ grapheme-table set-value
|
||||||
>>
|
] with-compilation-unit
|
||||||
|
|
|
@ -1,15 +1,12 @@
|
||||||
USING: assocs math kernel sequences io.files hashtables
|
USING: assocs math kernel sequences io.files hashtables
|
||||||
quotations splitting arrays math.parser combinators.lib hash2
|
quotations splitting arrays math.parser combinators.lib hash2
|
||||||
byte-arrays words namespaces words compiler.units ;
|
byte-arrays words namespaces words compiler.units const ;
|
||||||
IN: unicode.data
|
IN: unicode.data
|
||||||
|
|
||||||
! Convenience functions
|
! Convenience functions
|
||||||
: 1+* ( n/f _ -- n+1 )
|
: 1+* ( n/f _ -- n+1 )
|
||||||
drop [ 1+ ] [ 0 ] if* ;
|
drop [ 1+ ] [ 0 ] if* ;
|
||||||
|
|
||||||
: define-value ( value word -- )
|
|
||||||
swap 1quotation define ;
|
|
||||||
|
|
||||||
: ?between? ( n/f from to -- ? )
|
: ?between? ( n/f from to -- ? )
|
||||||
pick [ between? ] [ 3drop f ] if ;
|
pick [ between? ] [ 3drop f ] if ;
|
||||||
|
|
||||||
|
@ -107,16 +104,16 @@ C: <code-point> code-point
|
||||||
4 head [ multihex ] map first4
|
4 head [ multihex ] map first4
|
||||||
<code-point> swap first set ;
|
<code-point> swap first set ;
|
||||||
|
|
||||||
DEFER: simple-lower
|
VALUE: simple-lower
|
||||||
DEFER: simple-upper
|
VALUE: simple-upper
|
||||||
DEFER: simple-title
|
VALUE: simple-title
|
||||||
DEFER: canonical-map
|
VALUE: canonical-map
|
||||||
DEFER: combine-map
|
VALUE: combine-map
|
||||||
DEFER: class-map
|
VALUE: class-map
|
||||||
DEFER: compat-map
|
VALUE: compat-map
|
||||||
DEFER: category-map
|
VALUE: category-map
|
||||||
DEFER: name-map
|
VALUE: name-map
|
||||||
DEFER: special-casing
|
VALUE: special-casing
|
||||||
|
|
||||||
: canonical-entry ( char -- seq ) canonical-map at ;
|
: canonical-entry ( char -- seq ) canonical-map at ;
|
||||||
: combine-chars ( a b -- char/f ) combine-map hash2 ;
|
: combine-chars ( a b -- char/f ) combine-map hash2 ;
|
||||||
|
@ -132,16 +129,14 @@ DEFER: special-casing
|
||||||
[ length 5 = ] subset
|
[ length 5 = ] subset
|
||||||
[ [ set-code-point ] each ] H{ } make-assoc ;
|
[ [ set-code-point ] each ] H{ } make-assoc ;
|
||||||
|
|
||||||
[
|
load-data
|
||||||
load-data
|
dup process-names \ name-map set-value
|
||||||
dup process-names \ name-map define-value
|
13 over process-data \ simple-lower set-value
|
||||||
13 over process-data \ simple-lower define-value
|
12 over process-data tuck \ simple-upper set-value
|
||||||
12 over process-data tuck \ simple-upper define-value
|
14 over process-data swapd union \ simple-title set-value
|
||||||
14 over process-data swapd union \ simple-title define-value
|
dup process-combining \ class-map set-value
|
||||||
dup process-combining \ class-map define-value
|
dup process-canonical \ canonical-map set-value
|
||||||
dup process-canonical \ canonical-map define-value
|
\ combine-map set-value
|
||||||
\ combine-map define-value
|
dup process-compat \ compat-map set-value
|
||||||
dup process-compat \ compat-map define-value
|
process-category \ category-map set-value
|
||||||
process-category \ category-map define-value
|
load-special-casing \ special-casing set-value
|
||||||
load-special-casing \ special-casing define-value
|
|
||||||
] with-compilation-unit
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ USING: sequences namespaces unicode.data kernel combinators.lib
|
||||||
math arrays ;
|
math arrays ;
|
||||||
IN: unicode.normalize
|
IN: unicode.normalize
|
||||||
|
|
||||||
! Utility word
|
! Utility word--probably unnecessary
|
||||||
: make* ( seq quot exemplar -- newseq )
|
: make* ( seq quot exemplar -- newseq )
|
||||||
! quot has access to original seq on stack
|
! quot has access to original seq on stack
|
||||||
! this just makes the new-resizable the same length as seq
|
! this just makes the new-resizable the same length as seq
|
||||||
|
@ -89,7 +89,7 @@ IN: unicode.normalize
|
||||||
swap [ [
|
swap [ [
|
||||||
dup hangul? [ hangul>jamo % drop ]
|
dup hangul? [ hangul>jamo % drop ]
|
||||||
[ dup rot call [ % ] [ , ] ?if ] if
|
[ dup rot call [ % ] [ , ] ?if ] if
|
||||||
] with each ] "" make*
|
] with each ] "" make
|
||||||
dup reorder
|
dup reorder
|
||||||
] if ; inline
|
] if ; inline
|
||||||
|
|
||||||
|
@ -167,7 +167,7 @@ SYMBOL: char
|
||||||
0 ind set
|
0 ind set
|
||||||
SBUF" " clone after set
|
SBUF" " clone after set
|
||||||
pass-combining (compose)
|
pass-combining (compose)
|
||||||
] "" make* ;
|
] "" make ;
|
||||||
|
|
||||||
: nfc ( string -- nfc )
|
: nfc ( string -- nfc )
|
||||||
nfd compose ;
|
nfd compose ;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
USING: unicode.data kernel math sequences parser bit-arrays namespaces
|
USING: unicode.data kernel math sequences parser bit-arrays namespaces
|
||||||
sequences.private arrays quotations classes.predicate ;
|
sequences.private arrays quotations classes.predicate assocs ;
|
||||||
IN: unicode.syntax
|
IN: unicode.syntax
|
||||||
|
|
||||||
! Character classes (categories)
|
! Character classes (categories)
|
||||||
|
@ -48,5 +48,5 @@ IN: unicode.syntax
|
||||||
categories swap seq-minus define-category ; parsing
|
categories swap seq-minus define-category ; parsing
|
||||||
|
|
||||||
: UNICHAR:
|
: UNICHAR:
|
||||||
! This should be part of CHAR:
|
! This should be part of CHAR:. Also, name-map at ==> name>char
|
||||||
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing
|
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing
|
||||||
|
|
Loading…
Reference in New Issue