Changes to Unicode

db4
Daniel Ehrenberg 2008-01-29 13:33:14 -06:00
parent 58668874ad
commit edf1f27247
5 changed files with 42 additions and 39 deletions

View File

@ -14,3 +14,11 @@ IN: const
: ENUM: : ENUM:
";" parse-tokens [ create-in ] map define-enum ; parsing ";" parse-tokens [ create-in ] map define-enum ; parsing
: define-value ( word -- )
{ f } clone [ first ] curry define ;
: VALUE: CREATE define-value ; parsing
: set-value ( value word -- )
word-def first set-first ;

View File

@ -1,7 +1,7 @@
USING: unicode.categories kernel math combinators splitting USING: unicode.categories kernel math combinators splitting
sequences math.parser io.files io assocs arrays namespaces sequences math.parser io.files io assocs arrays namespaces
combinators.lib assocs.lib math.ranges unicode.normalize combinators.lib assocs.lib math.ranges unicode.normalize
unicode.syntax unicode.data compiler.units alien.syntax ; unicode.syntax unicode.data compiler.units alien.syntax const ;
IN: unicode.breaks IN: unicode.breaks
C-ENUM: Any L V T Extend Control CR LF graphemes ; C-ENUM: Any L V T Extend Control CR LF graphemes ;
@ -32,7 +32,7 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
: other-extend-lines ( -- lines ) : other-extend-lines ( -- lines )
"extra/unicode/PropList.txt" resource-path file-lines ; "extra/unicode/PropList.txt" resource-path file-lines ;
DEFER: other-extend VALUE: other-extend
CATEGORY: (extend) Me Mn ; CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? ) : extend? ( ch -- ? )
@ -77,7 +77,7 @@ SYMBOL: table
T T connect T T connect
graphemes Extend connect-after ; graphemes Extend connect-after ;
DEFER: grapheme-table VALUE: grapheme-table
: grapheme-break? ( class1 class2 -- ? ) : grapheme-break? ( class1 class2 -- ? )
grapheme-table nth nth not ; grapheme-table nth nth not ;
@ -113,10 +113,10 @@ DEFER: grapheme-table
[ grapheme-class dup rot grapheme-break? ] find-last-index [ grapheme-class dup rot grapheme-break? ] find-last-index
nip -1 or 1+ ; nip -1 or 1+ ;
<< [
other-extend-lines process-other-extend \ other-extend define-value other-extend-lines process-other-extend \ other-extend set-value
init-grapheme-table table init-grapheme-table table
[ make-grapheme-table finish-table ] with-variable [ make-grapheme-table finish-table ] with-variable
\ grapheme-table define-value \ grapheme-table set-value
>> ] with-compilation-unit

View File

@ -1,15 +1,12 @@
USING: assocs math kernel sequences io.files hashtables USING: assocs math kernel sequences io.files hashtables
quotations splitting arrays math.parser combinators.lib hash2 quotations splitting arrays math.parser combinators.lib hash2
byte-arrays words namespaces words compiler.units ; byte-arrays words namespaces words compiler.units const ;
IN: unicode.data IN: unicode.data
! Convenience functions ! Convenience functions
: 1+* ( n/f _ -- n+1 ) : 1+* ( n/f _ -- n+1 )
drop [ 1+ ] [ 0 ] if* ; drop [ 1+ ] [ 0 ] if* ;
: define-value ( value word -- )
swap 1quotation define ;
: ?between? ( n/f from to -- ? ) : ?between? ( n/f from to -- ? )
pick [ between? ] [ 3drop f ] if ; pick [ between? ] [ 3drop f ] if ;
@ -107,16 +104,16 @@ C: <code-point> code-point
4 head [ multihex ] map first4 4 head [ multihex ] map first4
<code-point> swap first set ; <code-point> swap first set ;
DEFER: simple-lower VALUE: simple-lower
DEFER: simple-upper VALUE: simple-upper
DEFER: simple-title VALUE: simple-title
DEFER: canonical-map VALUE: canonical-map
DEFER: combine-map VALUE: combine-map
DEFER: class-map VALUE: class-map
DEFER: compat-map VALUE: compat-map
DEFER: category-map VALUE: category-map
DEFER: name-map VALUE: name-map
DEFER: special-casing VALUE: special-casing
: canonical-entry ( char -- seq ) canonical-map at ; : canonical-entry ( char -- seq ) canonical-map at ;
: combine-chars ( a b -- char/f ) combine-map hash2 ; : combine-chars ( a b -- char/f ) combine-map hash2 ;
@ -132,16 +129,14 @@ DEFER: special-casing
[ length 5 = ] subset [ length 5 = ] subset
[ [ set-code-point ] each ] H{ } make-assoc ; [ [ set-code-point ] each ] H{ } make-assoc ;
[ load-data
load-data dup process-names \ name-map set-value
dup process-names \ name-map define-value 13 over process-data \ simple-lower set-value
13 over process-data \ simple-lower define-value 12 over process-data tuck \ simple-upper set-value
12 over process-data tuck \ simple-upper define-value 14 over process-data swapd union \ simple-title set-value
14 over process-data swapd union \ simple-title define-value dup process-combining \ class-map set-value
dup process-combining \ class-map define-value dup process-canonical \ canonical-map set-value
dup process-canonical \ canonical-map define-value \ combine-map set-value
\ combine-map define-value dup process-compat \ compat-map set-value
dup process-compat \ compat-map define-value process-category \ category-map set-value
process-category \ category-map define-value load-special-casing \ special-casing set-value
load-special-casing \ special-casing define-value
] with-compilation-unit

View File

@ -2,7 +2,7 @@ USING: sequences namespaces unicode.data kernel combinators.lib
math arrays ; math arrays ;
IN: unicode.normalize IN: unicode.normalize
! Utility word ! Utility word--probably unnecessary
: make* ( seq quot exemplar -- newseq ) : make* ( seq quot exemplar -- newseq )
! quot has access to original seq on stack ! quot has access to original seq on stack
! this just makes the new-resizable the same length as seq ! this just makes the new-resizable the same length as seq
@ -89,7 +89,7 @@ IN: unicode.normalize
swap [ [ swap [ [
dup hangul? [ hangul>jamo % drop ] dup hangul? [ hangul>jamo % drop ]
[ dup rot call [ % ] [ , ] ?if ] if [ dup rot call [ % ] [ , ] ?if ] if
] with each ] "" make* ] with each ] "" make
dup reorder dup reorder
] if ; inline ] if ; inline
@ -167,7 +167,7 @@ SYMBOL: char
0 ind set 0 ind set
SBUF" " clone after set SBUF" " clone after set
pass-combining (compose) pass-combining (compose)
] "" make* ; ] "" make ;
: nfc ( string -- nfc ) : nfc ( string -- nfc )
nfd compose ; nfd compose ;

View File

@ -1,5 +1,5 @@
USING: unicode.data kernel math sequences parser bit-arrays namespaces USING: unicode.data kernel math sequences parser bit-arrays namespaces
sequences.private arrays quotations classes.predicate ; sequences.private arrays quotations classes.predicate assocs ;
IN: unicode.syntax IN: unicode.syntax
! Character classes (categories) ! Character classes (categories)
@ -48,5 +48,5 @@ IN: unicode.syntax
categories swap seq-minus define-category ; parsing categories swap seq-minus define-category ; parsing
: UNICHAR: : UNICHAR:
! This should be part of CHAR: ! This should be part of CHAR:. Also, name-map at ==> name>char
scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing