unicode.collation: some cleanup, no test fixes
parent
bce6238d6e
commit
e6613011fb
|
@ -1,6 +1,6 @@
|
||||||
USING: arrays assocs fry grouping io io.encodings.utf8 io.files
|
USING: arrays assocs fry grouping io.encodings.utf8 io.files
|
||||||
io.streams.null kernel math math.order math.parser multiline
|
kernel math math.order math.parser sequences splitting
|
||||||
random sequences splitting strings tools.test unicode words ;
|
strings tools.test unicode ;
|
||||||
IN: unicode.collation.tests
|
IN: unicode.collation.tests
|
||||||
|
|
||||||
: test-equality ( str1 str2 -- ? ? ? ? )
|
: test-equality ( str1 str2 -- ? ? ? ? )
|
||||||
|
@ -16,11 +16,13 @@ IN: unicode.collation.tests
|
||||||
{ { "good bye" "goodbye" "hello" "HELLO" } }
|
{ { "good bye" "goodbye" "hello" "HELLO" } }
|
||||||
[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ] unit-test
|
[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ] unit-test
|
||||||
|
|
||||||
: parse-collation-test-shifted ( -- lines )
|
: collation-test-lines ( -- lines )
|
||||||
"vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines
|
"vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines
|
||||||
[ "#@" split first ] map harvest
|
[ "#" head? ] reject harvest ;
|
||||||
[ ";" split first ] map
|
|
||||||
[ " " split [ hex> ] "" map-as ] map ;
|
: parse-collation-test-shifted ( -- lines )
|
||||||
|
collation-test-lines
|
||||||
|
[ ";" split first " " split [ hex> ] "" map-as ] map ;
|
||||||
|
|
||||||
: tail-from-last ( string char -- string' )
|
: tail-from-last ( string char -- string' )
|
||||||
'[ _ = ] dupd find-last drop 1 + tail ; inline
|
'[ _ = ] dupd find-last drop 1 + tail ; inline
|
||||||
|
@ -36,16 +38,14 @@ IN: unicode.collation.tests
|
||||||
] bi* 2array ;
|
] bi* 2array ;
|
||||||
|
|
||||||
: parse-collation-test-weights ( -- weights )
|
: parse-collation-test-weights ( -- weights )
|
||||||
"vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines
|
collation-test-lines
|
||||||
[ "#" head? ] reject harvest
|
|
||||||
[ line>test-weights ] map ;
|
[ line>test-weights ] map ;
|
||||||
|
|
||||||
: calculate-collation ( chars collation -- collation-calculated collation-answer )
|
: calculate-collation ( chars collation -- collation-calculated collation-answer )
|
||||||
[ >string collation-key/nfd drop ] [ { 0 } join ] bi* ;
|
[ >string collation-key/nfd drop ] [ { 0 } join ] bi* ;
|
||||||
|
|
||||||
: find-bad-collations ( pairs -- seq )
|
: find-bad-collations ( pairs -- seq )
|
||||||
[ first2 dupd calculate-collation 3array ] map
|
[ first2 calculate-collation sequence= ] reject ;
|
||||||
[ first3 sequence= nip ] reject ;
|
|
||||||
|
|
||||||
{ { } }
|
{ { } }
|
||||||
[ parse-collation-test-weights find-bad-collations ] unit-test
|
[ parse-collation-test-weights find-bad-collations ] unit-test
|
||||||
|
@ -69,7 +69,7 @@ IN: unicode.collation.tests
|
||||||
[ { 4018 820 3953 3968 } >string collation-key/nfd drop ] unit-test
|
[ { 4018 820 3953 3968 } >string collation-key/nfd drop ] unit-test
|
||||||
|
|
||||||
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
||||||
[ { 4018 820 3968 3953 } >string collation-key/nfd drop ] unit-test
|
[ { 0x0FB2 0x0334 0x0F80 0x0F71 } >string collation-key/nfd drop ] unit-test
|
||||||
|
|
||||||
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
||||||
[ { 4018 820 3969 } >string collation-key/nfd drop ] unit-test
|
[ { 4018 820 3969 } >string collation-key/nfd drop ] unit-test
|
||||||
|
|
|
@ -72,6 +72,128 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
! FIXME: WRONG WEIGHTS
|
||||||
|
{
|
||||||
|
{ 0x0FB2 0x0F71 0x0F72 } ! CE(0FB2) CE(0F71 0F72)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB2 0x0F73 } ! CE(0FB2) CE(0F71 0F72)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB2 0x0F71 0x0F74 } ! CE(0FB2) CE(0F71 0F74)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB2 0x0F75 } ! CE(0FB2) CE(0F71 0F74)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB3 0x0F71 0x0F72 } ! CE(0FB3) CE(0F71 0F72)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB3 0x0F73 } ! CE(0FB3) CE(0F71 0F72)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB3 0x0F71 0x0F74 } ! CE(0FB3) CE(0F71 0F74)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
{ 0x0FB3 0x0F75 } ! CE(0FB3) CE(0F71 0F74)
|
||||||
|
{
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12719 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
T{ weight-levels
|
||||||
|
{ primary 12741 }
|
||||||
|
{ secondary 32 }
|
||||||
|
{ tertiary 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} ducet get-global '[ swap >string _ set-at ] assoc-each ;
|
} ducet get-global '[ swap >string _ set-at ] assoc-each ;
|
||||||
|
|
||||||
! Add a few missing ducet values
|
! Add a few missing ducet values
|
||||||
|
|
Loading…
Reference in New Issue