Unicode normalization bug fixes (incomplete)

db4
Daniel Ehrenberg 2009-01-05 22:19:14 -06:00
parent fa91133cc0
commit 9d3d3f815f
3 changed files with 11 additions and 10 deletions

View File

@ -4,7 +4,7 @@ USING: combinators.short-circuit assocs math kernel sequences
io.files hashtables quotations splitting grouping arrays io.files hashtables quotations splitting grouping arrays
math.parser hash2 math.order byte-arrays words namespaces words math.parser hash2 math.order byte-arrays words namespaces words
compiler.units parser io.encodings.ascii values interval-maps compiler.units parser io.encodings.ascii values interval-maps
ascii sets combinators locals math.ranges sorting ; ascii sets combinators locals math.ranges sorting make ;
IN: unicode.data IN: unicode.data
VALUE: simple-lower VALUE: simple-lower
@ -102,6 +102,7 @@ VALUE: properties
"Cc" "Cf" "Cs" "Co" } ; "Cc" "Cf" "Cs" "Co" } ;
: num-chars HEX: 2FA1E ; : num-chars HEX: 2FA1E ;
! the maximum unicode char in the first 3 planes ! the maximum unicode char in the first 3 planes
: ?set-nth ( val index seq -- ) : ?set-nth ( val index seq -- )

View File

@ -41,4 +41,4 @@ IN: unicode.normalize.tests
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ] [ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
} cleave ; } cleave ;
! parse-test [ run-line ] each parse-test 1000 head [ run-line ] each

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: sequences namespaces make unicode.data kernel math arrays USING: sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors ; locals sorting.insertion accessors assocs ;
IN: unicode.normalize IN: unicode.normalize
! Conjoining Jamo behavior ! Conjoining Jamo behavior
@ -117,16 +117,17 @@ SYMBOL: char
: pass-combining ( -- ) : pass-combining ( -- )
current non-starter? [ current , to pass-combining ] when ; current non-starter? [ current , to pass-combining ] when ;
: try-compose ( last-class char current-class -- ) :: try-compose ( last-class new-char current-class -- new-class )
swapd = [ after get push ] [ last-class current-class = [ new-char after get push last-class ] [
char get over combine-chars char get new-char combine-chars
[ nip char set ] [ after get push ] if* [ char set last-class ]
[ new-char after get push current-class ] if*
] if ; ] if ;
: compose-iter ( n -- ) : compose-iter ( last-class -- )
current [ current [
dup combining-class dup dup combining-class dup
[ [ try-compose ] keep to compose-iter ] [ 3drop ] if [ try-compose to compose-iter ] [ 3drop ] if
] [ drop ] if* ; ] [ drop ] if* ;
: ?new-after ( -- ) : ?new-after ( -- )
@ -138,7 +139,6 @@ SYMBOL: char
char set to ?new-after char set to ?new-after
0 compose-iter 0 compose-iter
char get , after get % char get , after get %
to
] if (compose) ] if (compose)
] when* ; ] when* ;