diff --git a/basis/unicode/collation/collation-tests.factor b/basis/unicode/collation/collation-tests.factor index c0ef6aa4f7..b44788396b 100644 --- a/basis/unicode/collation/collation-tests.factor +++ b/basis/unicode/collation/collation-tests.factor @@ -1,6 +1,6 @@ -USING: arrays assocs fry grouping io io.encodings.utf8 io.files -io.streams.null kernel math math.order math.parser multiline -random sequences splitting strings tools.test unicode words ; +USING: arrays assocs fry grouping io.encodings.utf8 io.files +kernel math math.order math.parser sequences splitting +strings tools.test unicode ; IN: unicode.collation.tests : test-equality ( str1 str2 -- ? ? ? ? ) @@ -16,11 +16,13 @@ IN: unicode.collation.tests { { "good bye" "goodbye" "hello" "HELLO" } } [ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ] unit-test -: parse-collation-test-shifted ( -- lines ) +: collation-test-lines ( -- lines ) "vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines - [ "#@" split first ] map harvest - [ ";" split first ] map - [ " " split [ hex> ] "" map-as ] map ; + [ "#" head? ] reject harvest ; + +: parse-collation-test-shifted ( -- lines ) + collation-test-lines + [ ";" split first " " split [ hex> ] "" map-as ] map ; : tail-from-last ( string char -- string' ) '[ _ = ] dupd find-last drop 1 + tail ; inline @@ -36,16 +38,14 @@ IN: unicode.collation.tests ] bi* 2array ; : parse-collation-test-weights ( -- weights ) - "vocab:unicode/UCA/CollationTest/CollationTest_SHIFTED.txt" utf8 file-lines - [ "#" head? ] reject harvest + collation-test-lines [ line>test-weights ] map ; : calculate-collation ( chars collation -- collation-calculated collation-answer ) [ >string collation-key/nfd drop ] [ { 0 } join ] bi* ; : find-bad-collations ( pairs -- seq ) - [ first2 dupd calculate-collation 3array ] map - [ first3 sequence= nip ] reject ; + [ first2 calculate-collation sequence= ] reject ; { { } } [ parse-collation-test-weights find-bad-collations ] unit-test @@ -69,7 +69,7 @@ IN: unicode.collation.tests [ { 4018 820 3953 3968 } >string collation-key/nfd drop ] unit-test { { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } } -[ { 4018 820 3968 3953 } >string collation-key/nfd drop ] unit-test +[ { 0x0FB2 0x0334 0x0F80 0x0F71 } >string collation-key/nfd drop ] unit-test { { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } } [ { 4018 820 3969 } >string collation-key/nfd drop ] unit-test diff --git a/basis/unicode/collation/collation.factor b/basis/unicode/collation/collation.factor index 89f6f2f4ed..7942840f80 100644 --- a/basis/unicode/collation/collation.factor +++ b/basis/unicode/collation/collation.factor @@ -72,6 +72,128 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ; } } } + + ! FIXME: WRONG WEIGHTS + { + { 0x0FB2 0x0F71 0x0F72 } ! CE(0FB2) CE(0F71 0F72) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB2 0x0F73 } ! CE(0FB2) CE(0F71 0F72) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB2 0x0F71 0x0F74 } ! CE(0FB2) CE(0F71 0F74) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB2 0x0F75 } ! CE(0FB2) CE(0F71 0F74) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB3 0x0F71 0x0F72 } ! CE(0FB3) CE(0F71 0F72) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB3 0x0F73 } ! CE(0FB3) CE(0F71 0F72) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB3 0x0F71 0x0F74 } ! CE(0FB3) CE(0F71 0F74) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } + { + { 0x0FB3 0x0F75 } ! CE(0FB3) CE(0F71 0F74) + { + T{ weight-levels + { primary 12719 } + { secondary 32 } + { tertiary 2 } + } + T{ weight-levels + { primary 12741 } + { secondary 32 } + { tertiary 2 } + } + } + } } ducet get-global '[ swap >string _ set-at ] assoc-each ; ! Add a few missing ducet values