Fixing normalize errors
parent
c0ad6b7c55
commit
8b351b1ad6
|
@ -28,10 +28,6 @@ VALUE: properties
|
|||
: char>name ( char -- string ) name-map value-at ;
|
||||
: property? ( char property -- ? ) properties at interval-key? ;
|
||||
|
||||
! Convenience functions
|
||||
: ?between? ( n/f from to -- ? )
|
||||
pick [ between? ] [ 3drop f ] if ;
|
||||
|
||||
! Loading data from UnicodeData.txt
|
||||
|
||||
: split-; ( line -- array )
|
||||
|
@ -206,9 +202,9 @@ SYMBOL: interned
|
|||
: expand-ranges ( assoc -- interval-map )
|
||||
[
|
||||
[
|
||||
CHAR: . pick member? [
|
||||
swap ".." split1 [ hex> ] bi@ 2array
|
||||
] [ swap hex> ] if range,
|
||||
swap CHAR: . over member? [
|
||||
".." split1 [ hex> ] bi@ 2array
|
||||
] [ hex> ] if range,
|
||||
] assoc-each
|
||||
] { } make <interval-map> ;
|
||||
|
||||
|
|
|
@ -8,9 +8,7 @@ ARTICLE: "unicode.normalize" "Unicode normalization"
|
|||
{ $subsection nfc }
|
||||
{ $subsection nfd }
|
||||
{ $subsection nfkc }
|
||||
{ $subsection nfkd }
|
||||
"If two strings in a normalization form are appended, the result may not be in that normalization form still. To append two strings in NFD and make sure the result is in NFD, the following procedure is supplied:"
|
||||
{ $subsection string-append } ;
|
||||
{ $subsection nfkd } ;
|
||||
|
||||
HELP: nfc
|
||||
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
||||
|
@ -27,7 +25,3 @@ HELP: nfkc
|
|||
HELP: nfkd
|
||||
{ $values { "string" string } { "nfc" "a string in NFKD" } }
|
||||
{ $description "Converts a string to Normalization Form KD" } ;
|
||||
|
||||
HELP: string-append
|
||||
{ $values { "s1" "a string in NFD" } { "s2" "a string in NFD" } { "string" "a string in NFD" } }
|
||||
{ $description "Appends two strings, putting the result in NFD." } ;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
USING: unicode.normalize kernel tools.test sequences
|
||||
unicode.data io.encodings.utf8 io.files splitting math.parser
|
||||
locals math quotations assocs combinators ;
|
||||
locals math quotations assocs combinators unicode.normalize.private ;
|
||||
IN: unicode.normalize.tests
|
||||
|
||||
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
||||
|
|
|
@ -1,21 +1,24 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences namespaces make unicode.data kernel math arrays
|
||||
locals sorting.insertion accessors assocs ;
|
||||
locals sorting.insertion accessors assocs math.order ;
|
||||
IN: unicode.normalize
|
||||
|
||||
<PRIVATE
|
||||
! Conjoining Jamo behavior
|
||||
|
||||
: hangul-base HEX: ac00 ; inline
|
||||
: hangul-end HEX: D7AF ; inline
|
||||
: initial-base HEX: 1100 ; inline
|
||||
: medial-base HEX: 1161 ; inline
|
||||
: final-base HEX: 11a7 ; inline
|
||||
CONSTANT: hangul-base HEX: ac00
|
||||
CONSTANT: hangul-end HEX: D7AF
|
||||
CONSTANT: initial-base HEX: 1100
|
||||
CONSTANT: medial-base HEX: 1161
|
||||
CONSTANT: final-base HEX: 11a7
|
||||
|
||||
: initial-count 19 ; inline
|
||||
: medial-count 21 ; inline
|
||||
: final-count 28 ; inline
|
||||
CONSTANT: initial-count 19
|
||||
CONSTANT: medial-count 21
|
||||
CONSTANT: final-count 28
|
||||
|
||||
: ?between? ( n/f from to -- ? )
|
||||
pick [ between? ] [ 3drop f ] if ;
|
||||
|
||||
: hangul? ( ch -- ? ) hangul-base hangul-end ?between? ;
|
||||
: jamo? ( ch -- ? ) HEX: 1100 HEX: 11FF ?between? ;
|
||||
|
@ -84,8 +87,6 @@ PRIVATE>
|
|||
[ compatibility-entry ] decompose ;
|
||||
|
||||
: string-append ( s1 s2 -- string )
|
||||
! This could be more optimized,
|
||||
! but in practice, it'll almost always just be append
|
||||
[ append ] keep
|
||||
0 over ?nth non-starter?
|
||||
[ length dupd reorder-back ] [ drop ] if ;
|
||||
|
|
Loading…
Reference in New Issue