Fixing insensitive equality tests

db4
Daniel Ehrenberg 2008-05-25 20:21:39 -05:00
parent 0f1885caf8
commit a77034c748
3 changed files with 29 additions and 16 deletions

View File

@ -0,0 +1,7 @@
USING: help.syntax help.markup ;
IN: unicode.collation
ABOUT: "unicode.collation"
ARTICLE: "unicode.collation" "Unicode collation algorithm"
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ;

View File

@ -15,5 +15,15 @@ IN: unicode.collation.tests
parse-test dup 2 <clumps>
[ string<=> +lt+ = not ] assoc-filter dup assoc-size ;
: test-equality
{ primary= secondary= tertiary= quaternary= }
[ execute ] 2with each ;
[ f f f f ] [ "hello" "hi" test-equality ] unit-test
[ t f f f ] [ "hello" "hŽllo" test-equality ] unit-test
[ t t f f ] [ "hello" "HELLO" test-equality ] unit-test
[ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test
[ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test
parse-test 2 <clumps>
[ [ test-two ] assoc-each ] with-null-writer

View File

@ -127,31 +127,27 @@ ducet insert-helpers
[ swap completely-ignorable? or not ] 2bi
] filter nip ;
: string>weights ( string -- weights )
nfd string>graphemes graphemes>weights filter-ignorable ;
: collation-key ( string -- key )
string>weights weights>bytes ;
nfd string>graphemes graphemes>weights
filter-ignorable weights>bytes ;
: insensitive= ( str1 str2 levels-removed -- ? )
[
swap collation-key swap
[ [ 0 = not ] right-trim but-last ] times
] curry bi@ = ;
: primary= ( str1 str2 -- ? )
[ string>weights [ primary>> ] map ] bi@ = ;
3 insensitive= ;
: secondary= ( str1 str2 -- ? )
[
string>weights
[ { primary>> secondary>> } get-slots 2array ] map
] bi@ = ;
2 insensitive= ;
: tertiary= ( str1 str2 -- ? )
string>weights [
string>weights [
{ primary>> secondary>> tertiary>> }
get-slots 3array
] map
] bi@ = ;
1 insensitive= ;
: quaternary= ( str1 str2 -- ? )
[ collation-key ] bi@ = ;
0 insensitive= ;
: compare-collation ( {str1,key} {str2,key} -- <=> )
2dup [ second ] bi@ <=> dup +eq+ =