From a77034c748e27e8295eb956fe99b7227901abed1 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Sun, 25 May 2008 20:21:39 -0500 Subject: [PATCH] Fixing insensitive equality tests --- extra/unicode/collation/collation-docs.factor | 7 +++++ .../unicode/collation/collation-tests.factor | 10 +++++++ extra/unicode/collation/collation.factor | 28 ++++++++----------- 3 files changed, 29 insertions(+), 16 deletions(-) create mode 100644 extra/unicode/collation/collation-docs.factor diff --git a/extra/unicode/collation/collation-docs.factor b/extra/unicode/collation/collation-docs.factor new file mode 100644 index 0000000000..23538229a4 --- /dev/null +++ b/extra/unicode/collation/collation-docs.factor @@ -0,0 +1,7 @@ +USING: help.syntax help.markup ; +IN: unicode.collation + +ABOUT: "unicode.collation" + +ARTICLE: "unicode.collation" "Unicode collation algorithm" +"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ; diff --git a/extra/unicode/collation/collation-tests.factor b/extra/unicode/collation/collation-tests.factor index 24c4711a04..a684133992 100755 --- a/extra/unicode/collation/collation-tests.factor +++ b/extra/unicode/collation/collation-tests.factor @@ -15,5 +15,15 @@ IN: unicode.collation.tests parse-test dup 2 [ string<=> +lt+ = not ] assoc-filter dup assoc-size ; +: test-equality + { primary= secondary= tertiary= quaternary= } + [ execute ] 2with each ; + +[ f f f f ] [ "hello" "hi" test-equality ] unit-test +[ t f f f ] [ "hello" "hˇllo" test-equality ] unit-test +[ t t f f ] [ "hello" "HELLO" test-equality ] unit-test +[ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test +[ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test + parse-test 2 [ [ test-two ] assoc-each ] with-null-writer diff --git a/extra/unicode/collation/collation.factor b/extra/unicode/collation/collation.factor index 1b0ce7724d..b12a10709e 100755 --- a/extra/unicode/collation/collation.factor +++ b/extra/unicode/collation/collation.factor @@ -127,31 +127,27 @@ ducet insert-helpers [ swap completely-ignorable? or not ] 2bi ] filter nip ; -: string>weights ( string -- weights ) - nfd string>graphemes graphemes>weights filter-ignorable ; - : collation-key ( string -- key ) - string>weights weights>bytes ; + nfd string>graphemes graphemes>weights + filter-ignorable weights>bytes ; + +: insensitive= ( str1 str2 levels-removed -- ? ) + [ + swap collation-key swap + [ [ 0 = not ] right-trim but-last ] times + ] curry bi@ = ; : primary= ( str1 str2 -- ? ) - [ string>weights [ primary>> ] map ] bi@ = ; + 3 insensitive= ; : secondary= ( str1 str2 -- ? ) - [ - string>weights - [ { primary>> secondary>> } get-slots 2array ] map - ] bi@ = ; + 2 insensitive= ; : tertiary= ( str1 str2 -- ? ) - string>weights [ - string>weights [ - { primary>> secondary>> tertiary>> } - get-slots 3array - ] map - ] bi@ = ; + 1 insensitive= ; : quaternary= ( str1 str2 -- ? ) - [ collation-key ] bi@ = ; + 0 insensitive= ; : compare-collation ( {str1,key} {str2,key} -- <=> ) 2dup [ second ] bi@ <=> dup +eq+ =