Fixing normalize errors
parent
c0ad6b7c55
commit
8b351b1ad6
|
@ -28,10 +28,6 @@ VALUE: properties
|
||||||
: char>name ( char -- string ) name-map value-at ;
|
: char>name ( char -- string ) name-map value-at ;
|
||||||
: property? ( char property -- ? ) properties at interval-key? ;
|
: property? ( char property -- ? ) properties at interval-key? ;
|
||||||
|
|
||||||
! Convenience functions
|
|
||||||
: ?between? ( n/f from to -- ? )
|
|
||||||
pick [ between? ] [ 3drop f ] if ;
|
|
||||||
|
|
||||||
! Loading data from UnicodeData.txt
|
! Loading data from UnicodeData.txt
|
||||||
|
|
||||||
: split-; ( line -- array )
|
: split-; ( line -- array )
|
||||||
|
@ -206,9 +202,9 @@ SYMBOL: interned
|
||||||
: expand-ranges ( assoc -- interval-map )
|
: expand-ranges ( assoc -- interval-map )
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
CHAR: . pick member? [
|
swap CHAR: . over member? [
|
||||||
swap ".." split1 [ hex> ] bi@ 2array
|
".." split1 [ hex> ] bi@ 2array
|
||||||
] [ swap hex> ] if range,
|
] [ hex> ] if range,
|
||||||
] assoc-each
|
] assoc-each
|
||||||
] { } make <interval-map> ;
|
] { } make <interval-map> ;
|
||||||
|
|
||||||
|
|
|
@ -8,9 +8,7 @@ ARTICLE: "unicode.normalize" "Unicode normalization"
|
||||||
{ $subsection nfc }
|
{ $subsection nfc }
|
||||||
{ $subsection nfd }
|
{ $subsection nfd }
|
||||||
{ $subsection nfkc }
|
{ $subsection nfkc }
|
||||||
{ $subsection nfkd }
|
{ $subsection nfkd } ;
|
||||||
"If two strings in a normalization form are appended, the result may not be in that normalization form still. To append two strings in NFD and make sure the result is in NFD, the following procedure is supplied:"
|
|
||||||
{ $subsection string-append } ;
|
|
||||||
|
|
||||||
HELP: nfc
|
HELP: nfc
|
||||||
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
||||||
|
@ -27,7 +25,3 @@ HELP: nfkc
|
||||||
HELP: nfkd
|
HELP: nfkd
|
||||||
{ $values { "string" string } { "nfc" "a string in NFKD" } }
|
{ $values { "string" string } { "nfc" "a string in NFKD" } }
|
||||||
{ $description "Converts a string to Normalization Form KD" } ;
|
{ $description "Converts a string to Normalization Form KD" } ;
|
||||||
|
|
||||||
HELP: string-append
|
|
||||||
{ $values { "s1" "a string in NFD" } { "s2" "a string in NFD" } { "string" "a string in NFD" } }
|
|
||||||
{ $description "Appends two strings, putting the result in NFD." } ;
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
USING: unicode.normalize kernel tools.test sequences
|
USING: unicode.normalize kernel tools.test sequences
|
||||||
unicode.data io.encodings.utf8 io.files splitting math.parser
|
unicode.data io.encodings.utf8 io.files splitting math.parser
|
||||||
locals math quotations assocs combinators ;
|
locals math quotations assocs combinators unicode.normalize.private ;
|
||||||
IN: unicode.normalize.tests
|
IN: unicode.normalize.tests
|
||||||
|
|
||||||
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
||||||
|
|
|
@ -1,21 +1,24 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences namespaces make unicode.data kernel math arrays
|
USING: sequences namespaces make unicode.data kernel math arrays
|
||||||
locals sorting.insertion accessors assocs ;
|
locals sorting.insertion accessors assocs math.order ;
|
||||||
IN: unicode.normalize
|
IN: unicode.normalize
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
! Conjoining Jamo behavior
|
! Conjoining Jamo behavior
|
||||||
|
|
||||||
: hangul-base HEX: ac00 ; inline
|
CONSTANT: hangul-base HEX: ac00
|
||||||
: hangul-end HEX: D7AF ; inline
|
CONSTANT: hangul-end HEX: D7AF
|
||||||
: initial-base HEX: 1100 ; inline
|
CONSTANT: initial-base HEX: 1100
|
||||||
: medial-base HEX: 1161 ; inline
|
CONSTANT: medial-base HEX: 1161
|
||||||
: final-base HEX: 11a7 ; inline
|
CONSTANT: final-base HEX: 11a7
|
||||||
|
|
||||||
: initial-count 19 ; inline
|
CONSTANT: initial-count 19
|
||||||
: medial-count 21 ; inline
|
CONSTANT: medial-count 21
|
||||||
: final-count 28 ; inline
|
CONSTANT: final-count 28
|
||||||
|
|
||||||
|
: ?between? ( n/f from to -- ? )
|
||||||
|
pick [ between? ] [ 3drop f ] if ;
|
||||||
|
|
||||||
: hangul? ( ch -- ? ) hangul-base hangul-end ?between? ;
|
: hangul? ( ch -- ? ) hangul-base hangul-end ?between? ;
|
||||||
: jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ;
|
: jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ;
|
||||||
|
@ -84,8 +87,6 @@ PRIVATE>
|
||||||
[ compatibility-entry ] decompose ;
|
[ compatibility-entry ] decompose ;
|
||||||
|
|
||||||
: string-append ( s1 s2 -- string )
|
: string-append ( s1 s2 -- string )
|
||||||
! This could be more optimized,
|
|
||||||
! but in practice, it'll almost always just be append
|
|
||||||
[ append ] keep
|
[ append ] keep
|
||||||
0 over ?nth non-starter?
|
0 over ?nth non-starter?
|
||||||
[ length dupd reorder-back ] [ drop ] if ;
|
[ length dupd reorder-back ] [ drop ] if ;
|
||||||
|
|
Loading…
Reference in New Issue