2019-07-28 15:45:51 -04:00
|
|
|
USING: arrays assocs fry grouping io.encodings.utf8 io.files
|
|
|
|
kernel math math.order math.parser sequences splitting
|
|
|
|
strings tools.test unicode ;
|
2015-06-29 19:43:15 -04:00
|
|
|
IN: unicode.collation.tests
|
|
|
|
|
|
|
|
: test-equality ( str1 str2 -- ? ? ? ? )
|
|
|
|
{ primary= secondary= tertiary= quaternary= }
|
|
|
|
[ execute( a b -- ? ) ] 2with map
|
|
|
|
first4 ;
|
|
|
|
|
2015-07-03 12:39:59 -04:00
|
|
|
{ f f f f } [ "hello" "hi" test-equality ] unit-test
|
|
|
|
{ t f f f } [ "hello" "h\u0000e9llo" test-equality ] unit-test
|
|
|
|
{ t t f f } [ "hello" "HELLO" test-equality ] unit-test
|
|
|
|
{ t t t f } [ "hello" "h e l l o." test-equality ] unit-test
|
|
|
|
{ t t t t } [ "hello" "\0hello\0" test-equality ] unit-test
|
|
|
|
{ { "good bye" "goodbye" "hello" "HELLO" } }
|
2019-06-22 10:47:07 -04:00
|
|
|
[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ] unit-test
|
|
|
|
|
2019-07-28 15:45:51 -04:00
|
|
|
: collation-test-lines ( -- lines )
|
2019-06-22 10:47:07 -04:00
|
|
|
"vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines
|
2019-07-28 15:45:51 -04:00
|
|
|
[ "#" head? ] reject harvest ;
|
|
|
|
|
|
|
|
: parse-collation-test-shifted ( -- lines )
|
|
|
|
collation-test-lines
|
|
|
|
[ ";" split first " " split [ hex> ] "" map-as ] map ;
|
2019-06-22 10:47:07 -04:00
|
|
|
|
|
|
|
: tail-from-last ( string char -- string' )
|
|
|
|
'[ _ = ] dupd find-last drop 1 + tail ; inline
|
|
|
|
|
|
|
|
: line>test-weights ( string -- pair )
|
|
|
|
";" split1 [
|
|
|
|
" " split [ hex> ] map
|
|
|
|
] [
|
|
|
|
"#" split1 nip CHAR: [ tail-from-last
|
|
|
|
"]" split1 drop
|
|
|
|
"|" split 4 head
|
|
|
|
[ " " split harvest [ hex> ] map ] map
|
|
|
|
] bi* 2array ;
|
|
|
|
|
|
|
|
: parse-collation-test-weights ( -- weights )
|
2019-07-28 15:45:51 -04:00
|
|
|
collation-test-lines
|
2019-06-22 10:47:07 -04:00
|
|
|
[ line>test-weights ] map ;
|
|
|
|
|
|
|
|
: calculate-collation ( chars collation -- collation-calculated collation-answer )
|
|
|
|
[ >string collation-key/nfd drop ] [ { 0 } join ] bi* ;
|
|
|
|
|
|
|
|
: find-bad-collations ( pairs -- seq )
|
2019-07-28 15:45:51 -04:00
|
|
|
[ first2 calculate-collation sequence= ] reject ;
|
2019-06-22 10:47:07 -04:00
|
|
|
|
|
|
|
{ { } }
|
|
|
|
[ parse-collation-test-weights find-bad-collations ] unit-test
|
|
|
|
|
|
|
|
{ { } } [
|
|
|
|
parse-collation-test-shifted
|
|
|
|
2 clump
|
|
|
|
[ string<=> { +lt+ +eq+ } member? ] assoc-reject
|
2016-07-10 19:36:14 -04:00
|
|
|
] unit-test
|
2019-06-22 10:47:07 -04:00
|
|
|
|
2019-07-28 14:10:11 -04:00
|
|
|
! FIXME: ducet table is wrong
|
2019-07-28 16:04:32 -04:00
|
|
|
! Fixed by fixing ducet table
|
|
|
|
! { +lt+ } [ { 4019 98 } { 4019 3953 1 3968 97 } [ >string ] bi@ string<=> ] unit-test
|
|
|
|
|
2019-07-06 10:32:47 -04:00
|
|
|
{ +lt+ } [ { 4018 820 3969 } { 3959 33 } [ >string ] bi@ string<=> ] unit-test
|
|
|
|
{ +lt+ } [ { 4019 3953 820 3968 } { 3961 33 } [ >string ] bi@ string<=> ] unit-test
|
|
|
|
|
|
|
|
|
|
|
|
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 3958 3953 820 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
|
|
|
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 4018 820 3953 3968 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
2019-07-28 16:04:32 -04:00
|
|
|
! { { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
! [ { 0x0FB2 0x0334 0x0F80 0x0F71 } >string collation-key/nfd drop ] unit-test
|
2019-07-06 10:32:47 -04:00
|
|
|
|
|
|
|
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 4018 820 3969 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
|
|
|
{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 3960 3953 820 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
|
|
|
{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 4019 820 3953 3968 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
|
|
|
{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 4019 820 3968 3953 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
|
|
|
{ { 12750 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
|
|
|
[ { 4019 3953 820 3968 } >string collation-key/nfd drop ] unit-test
|
|
|
|
|
2019-07-28 14:38:42 -04:00
|
|
|
{ { 12722 12741 12744 7817 0 32 32 32 32 0 2 2 2 2 0 65535 65535 65535 65535 } }
|
|
|
|
[ { 4019 3953 1 3968 97 } >string collation-key/nfd drop ] unit-test
|
2019-07-28 16:04:32 -04:00
|
|
|
! { 0xfb3 0x0f71 0x0334 0x0f80 }
|