Unicode normalization bug fixes (incomplete)
parent
fa91133cc0
commit
9d3d3f815f
|
@ -4,7 +4,7 @@ USING: combinators.short-circuit assocs math kernel sequences
|
||||||
io.files hashtables quotations splitting grouping arrays
|
io.files hashtables quotations splitting grouping arrays
|
||||||
math.parser hash2 math.order byte-arrays words namespaces words
|
math.parser hash2 math.order byte-arrays words namespaces words
|
||||||
compiler.units parser io.encodings.ascii values interval-maps
|
compiler.units parser io.encodings.ascii values interval-maps
|
||||||
ascii sets combinators locals math.ranges sorting ;
|
ascii sets combinators locals math.ranges sorting make ;
|
||||||
IN: unicode.data
|
IN: unicode.data
|
||||||
|
|
||||||
VALUE: simple-lower
|
VALUE: simple-lower
|
||||||
|
@ -102,6 +102,7 @@ VALUE: properties
|
||||||
"Cc" "Cf" "Cs" "Co" } ;
|
"Cc" "Cf" "Cs" "Co" } ;
|
||||||
|
|
||||||
: num-chars HEX: 2FA1E ;
|
: num-chars HEX: 2FA1E ;
|
||||||
|
|
||||||
! the maximum unicode char in the first 3 planes
|
! the maximum unicode char in the first 3 planes
|
||||||
|
|
||||||
: ?set-nth ( val index seq -- )
|
: ?set-nth ( val index seq -- )
|
||||||
|
|
|
@ -41,4 +41,4 @@ IN: unicode.normalize.tests
|
||||||
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
|
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
|
||||||
} cleave ;
|
} cleave ;
|
||||||
|
|
||||||
! parse-test [ run-line ] each
|
parse-test 1000 head [ run-line ] each
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: sequences namespaces make unicode.data kernel math arrays
|
USING: sequences namespaces make unicode.data kernel math arrays
|
||||||
locals sorting.insertion accessors ;
|
locals sorting.insertion accessors assocs ;
|
||||||
IN: unicode.normalize
|
IN: unicode.normalize
|
||||||
|
|
||||||
! Conjoining Jamo behavior
|
! Conjoining Jamo behavior
|
||||||
|
@ -117,16 +117,17 @@ SYMBOL: char
|
||||||
: pass-combining ( -- )
|
: pass-combining ( -- )
|
||||||
current non-starter? [ current , to pass-combining ] when ;
|
current non-starter? [ current , to pass-combining ] when ;
|
||||||
|
|
||||||
: try-compose ( last-class char current-class -- )
|
:: try-compose ( last-class new-char current-class -- new-class )
|
||||||
swapd = [ after get push ] [
|
last-class current-class = [ new-char after get push last-class ] [
|
||||||
char get over combine-chars
|
char get new-char combine-chars
|
||||||
[ nip char set ] [ after get push ] if*
|
[ char set last-class ]
|
||||||
|
[ new-char after get push current-class ] if*
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: compose-iter ( n -- )
|
: compose-iter ( last-class -- )
|
||||||
current [
|
current [
|
||||||
dup combining-class dup
|
dup combining-class dup
|
||||||
[ [ try-compose ] keep to compose-iter ] [ 3drop ] if
|
[ try-compose to compose-iter ] [ 3drop ] if
|
||||||
] [ drop ] if* ;
|
] [ drop ] if* ;
|
||||||
|
|
||||||
: ?new-after ( -- )
|
: ?new-after ( -- )
|
||||||
|
@ -138,7 +139,6 @@ SYMBOL: char
|
||||||
char set to ?new-after
|
char set to ?new-after
|
||||||
0 compose-iter
|
0 compose-iter
|
||||||
char get , after get %
|
char get , after get %
|
||||||
to
|
|
||||||
] if (compose)
|
] if (compose)
|
||||||
] when* ;
|
] when* ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue