Changes to Unicode

db4
Daniel Ehrenberg 2008-01-29 13:33:14 -06:00
parent 58668874ad
commit edf1f27247
5 changed files with 42 additions and 39 deletions

View File

@ -14,3 +14,11 @@ IN: const
: ENUM:
";" parse-tokens [ create-in ] map define-enum ; parsing
: define-value ( word -- )
{ f } clone [ first ] curry define ;
: VALUE: CREATE define-value ; parsing
: set-value ( value word -- )
word-def first set-first ;

View File

@ -1,7 +1,7 @@
USING: unicode.categories kernel math combinators splitting
sequences math.parser io.files io assocs arrays namespaces
combinators.lib assocs.lib math.ranges unicode.normalize
unicode.syntax unicode.data compiler.units alien.syntax ;
unicode.syntax unicode.data compiler.units alien.syntax const ;
IN: unicode.breaks
C-ENUM: Any L V T Extend Control CR LF graphemes ;
@ -32,7 +32,7 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
: other-extend-lines ( -- lines )
"extra/unicode/PropList.txt" resource-path file-lines ;
DEFER: other-extend
VALUE: other-extend
CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? )
@ -77,7 +77,7 @@ SYMBOL: table
T T connect
graphemes Extend connect-after ;
DEFER: grapheme-table
VALUE: grapheme-table
: grapheme-break? ( class1 class2 -- ? )
grapheme-table nth nth not ;
@ -113,10 +113,10 @@ DEFER: grapheme-table
[ grapheme-class dup rot grapheme-break? ] find-last-index
nip -1 or 1+ ;
<<
other-extend-lines process-other-extend \ other-extend define-value
[
other-extend-lines process-other-extend \ other-extend set-value
init-grapheme-table table
[ make-grapheme-table finish-table ] with-variable
\ grapheme-table define-value
>>
\ grapheme-table set-value
] with-compilation-unit

View File

@ -1,15 +1,12 @@
USING: assocs math kernel sequences io.files hashtables
quotations splitting arrays math.parser combinators.lib hash2
byte-arrays words namespaces words compiler.units ;
byte-arrays words namespaces words compiler.units const ;
IN: unicode.data
! Convenience functions
: 1+* ( n/f _ -- n+1 )
drop [ 1+ ] [ 0 ] if* ;
: define-value ( value word -- )
swap 1quotation define ;
: ?between? ( n/f from to -- ? )
pick [ between? ] [ 3drop f ] if ;
@ -107,16 +104,16 @@ C: <code-point> code-point
4 head [ multihex ] map first4
<code-point> swap first set ;
DEFER: simple-lower
DEFER: simple-upper
DEFER: simple-title
DEFER: canonical-map
DEFER: combine-map
DEFER: class-map
DEFER: compat-map
DEFER: category-map
DEFER: name-map
DEFER: special-casing
VALUE: simple-lower
VALUE: simple-upper
VALUE: simple-title
VALUE: canonical-map
VALUE: combine-map
VALUE: class-map
VALUE: compat-map
VALUE: category-map
VALUE: name-map
VALUE: special-casing
: canonical-entry ( char -- seq ) canonical-map at ;
: combine-chars ( a b -- char/f ) combine-map hash2 ;
@ -132,16 +129,14 @@ DEFER: special-casing
[ length 5 = ] subset
[ [ set-code-point ] each ] H{ } make-assoc ;
[
load-data
dup process-names \ name-map define-value
13 over process-data \ simple-lower define-value
12 over process-data tuck \ simple-upper define-value
14 over process-data swapd union \ simple-title define-value
dup process-combining \ class-map define-value
dup process-canonical \ canonical-map define-value
\ combine-map define-value
dup process-compat \ compat-map define-value
process-category \ category-map define-value
load-special-casing \ special-casing define-value
] with-compilation-unit
load-data
dup process-names \ name-map set-value
13 over process-data \ simple-lower set-value
12 over process-data tuck \ simple-upper set-value
14 over process-data swapd union \ simple-title set-value
dup process-combining \ class-map set-value
dup process-canonical \ canonical-map set-value
\ combine-map set-value
dup process-compat \ compat-map set-value
process-category \ category-map set-value
load-special-casing \ special-casing set-value

View File

@ -2,7 +2,7 @@ USING: sequences namespaces unicode.data kernel combinators.lib
math arrays ;
IN: unicode.normalize
! Utility word
! Utility word--probably unnecessary
: make* ( seq quot exemplar -- newseq )
! quot has access to original seq on stack
! this just makes the new-resizable the same length as seq
@ -89,7 +89,7 @@ IN: unicode.normalize
swap [ [
dup hangul? [ hangul>jamo % drop ]
[ dup rot call [ % ] [ , ] ?if ] if
] with each ] "" make*
] with each ] "" make
dup reorder
] if ; inline
@ -167,7 +167,7 @@ SYMBOL: char
0 ind set
SBUF" " clone after set
pass-combining (compose)
] "" make* ;
] "" make ;
: nfc ( string -- nfc )
nfd compose ;

View File

@ -1,5 +1,5 @@
USING: unicode.data kernel math sequences parser bit-arrays namespaces
sequences.private arrays quotations classes.predicate ;
sequences.private arrays quotations classes.predicate assocs ;
IN: unicode.syntax
! Character classes (categories)
@ -48,5 +48,5 @@ IN: unicode.syntax
categories swap seq-minus define-category ; parsing
: UNICHAR:
! This should be part of CHAR:
! This should be part of CHAR:. Also, name-map at ==> name>char
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing