From edf1f2724728b9088b9a746814c7dc9f912e7cd0 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Tue, 29 Jan 2008 13:33:14 -0600 Subject: [PATCH] Changes to Unicode --- extra/const/const.factor | 8 ++++ extra/unicode/breaks/breaks.factor | 14 +++---- extra/unicode/data/data.factor | 49 +++++++++++------------- extra/unicode/normalize/normalize.factor | 6 +-- extra/unicode/syntax/syntax.factor | 4 +- 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/extra/const/const.factor b/extra/const/const.factor index 59d65edaae..8efef7e372 100644 --- a/extra/const/const.factor +++ b/extra/const/const.factor @@ -14,3 +14,11 @@ IN: const : ENUM: ";" parse-tokens [ create-in ] map define-enum ; parsing + +: define-value ( word -- ) + { f } clone [ first ] curry define ; + +: VALUE: CREATE define-value ; parsing + +: set-value ( value word -- ) + word-def first set-first ; diff --git a/extra/unicode/breaks/breaks.factor b/extra/unicode/breaks/breaks.factor index 9c9242edc3..70a9c781a2 100644 --- a/extra/unicode/breaks/breaks.factor +++ b/extra/unicode/breaks/breaks.factor @@ -1,7 +1,7 @@ USING: unicode.categories kernel math combinators splitting sequences math.parser io.files io assocs arrays namespaces combinators.lib assocs.lib math.ranges unicode.normalize -unicode.syntax unicode.data compiler.units alien.syntax ; +unicode.syntax unicode.data compiler.units alien.syntax const ; IN: unicode.breaks C-ENUM: Any L V T Extend Control CR LF graphemes ; @@ -32,7 +32,7 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ; : other-extend-lines ( -- lines ) "extra/unicode/PropList.txt" resource-path file-lines ; -DEFER: other-extend +VALUE: other-extend CATEGORY: (extend) Me Mn ; : extend? ( ch -- ? ) @@ -77,7 +77,7 @@ SYMBOL: table T T connect graphemes Extend connect-after ; -DEFER: grapheme-table +VALUE: grapheme-table : grapheme-break? ( class1 class2 -- ? ) grapheme-table nth nth not ; @@ -113,10 +113,10 @@ DEFER: grapheme-table [ grapheme-class dup rot grapheme-break? ] find-last-index nip -1 or 1+ ; -<< - other-extend-lines process-other-extend \ other-extend define-value +[ + other-extend-lines process-other-extend \ other-extend set-value init-grapheme-table table [ make-grapheme-table finish-table ] with-variable - \ grapheme-table define-value ->> + \ grapheme-table set-value +] with-compilation-unit diff --git a/extra/unicode/data/data.factor b/extra/unicode/data/data.factor index e112471c28..c579d1fdfd 100644 --- a/extra/unicode/data/data.factor +++ b/extra/unicode/data/data.factor @@ -1,15 +1,12 @@ USING: assocs math kernel sequences io.files hashtables quotations splitting arrays math.parser combinators.lib hash2 -byte-arrays words namespaces words compiler.units ; +byte-arrays words namespaces words compiler.units const ; IN: unicode.data ! Convenience functions : 1+* ( n/f _ -- n+1 ) drop [ 1+ ] [ 0 ] if* ; -: define-value ( value word -- ) - swap 1quotation define ; - : ?between? ( n/f from to -- ? ) pick [ between? ] [ 3drop f ] if ; @@ -107,16 +104,16 @@ C: code-point 4 head [ multihex ] map first4 swap first set ; -DEFER: simple-lower -DEFER: simple-upper -DEFER: simple-title -DEFER: canonical-map -DEFER: combine-map -DEFER: class-map -DEFER: compat-map -DEFER: category-map -DEFER: name-map -DEFER: special-casing +VALUE: simple-lower +VALUE: simple-upper +VALUE: simple-title +VALUE: canonical-map +VALUE: combine-map +VALUE: class-map +VALUE: compat-map +VALUE: category-map +VALUE: name-map +VALUE: special-casing : canonical-entry ( char -- seq ) canonical-map at ; : combine-chars ( a b -- char/f ) combine-map hash2 ; @@ -132,16 +129,14 @@ DEFER: special-casing [ length 5 = ] subset [ [ set-code-point ] each ] H{ } make-assoc ; -[ - load-data - dup process-names \ name-map define-value - 13 over process-data \ simple-lower define-value - 12 over process-data tuck \ simple-upper define-value - 14 over process-data swapd union \ simple-title define-value - dup process-combining \ class-map define-value - dup process-canonical \ canonical-map define-value - \ combine-map define-value - dup process-compat \ compat-map define-value - process-category \ category-map define-value - load-special-casing \ special-casing define-value -] with-compilation-unit +load-data +dup process-names \ name-map set-value +13 over process-data \ simple-lower set-value +12 over process-data tuck \ simple-upper set-value +14 over process-data swapd union \ simple-title set-value +dup process-combining \ class-map set-value +dup process-canonical \ canonical-map set-value + \ combine-map set-value +dup process-compat \ compat-map set-value +process-category \ category-map set-value +load-special-casing \ special-casing set-value diff --git a/extra/unicode/normalize/normalize.factor b/extra/unicode/normalize/normalize.factor index 86a922793f..b018d115f8 100644 --- a/extra/unicode/normalize/normalize.factor +++ b/extra/unicode/normalize/normalize.factor @@ -2,7 +2,7 @@ USING: sequences namespaces unicode.data kernel combinators.lib math arrays ; IN: unicode.normalize -! Utility word +! Utility word--probably unnecessary : make* ( seq quot exemplar -- newseq ) ! quot has access to original seq on stack ! this just makes the new-resizable the same length as seq @@ -89,7 +89,7 @@ IN: unicode.normalize swap [ [ dup hangul? [ hangul>jamo % drop ] [ dup rot call [ % ] [ , ] ?if ] if - ] with each ] "" make* + ] with each ] "" make dup reorder ] if ; inline @@ -167,7 +167,7 @@ SYMBOL: char 0 ind set SBUF" " clone after set pass-combining (compose) - ] "" make* ; + ] "" make ; : nfc ( string -- nfc ) nfd compose ; diff --git a/extra/unicode/syntax/syntax.factor b/extra/unicode/syntax/syntax.factor index 5119663872..6c75a77c76 100644 --- a/extra/unicode/syntax/syntax.factor +++ b/extra/unicode/syntax/syntax.factor @@ -1,5 +1,5 @@ USING: unicode.data kernel math sequences parser bit-arrays namespaces -sequences.private arrays quotations classes.predicate ; +sequences.private arrays quotations classes.predicate assocs ; IN: unicode.syntax ! Character classes (categories) @@ -48,5 +48,5 @@ IN: unicode.syntax categories swap seq-minus define-category ; parsing : UNICHAR: - ! This should be part of CHAR: + ! This should be part of CHAR:. Also, name-map at ==> name>char scan name>char [ parsed ] [ "Invalid character" throw ] if* ; parsing