diff --git a/extra/unicode/collation/collation-docs.factor b/extra/unicode/collation/collation-docs.factor index 23538229a4..0e92042ddd 100644 --- a/extra/unicode/collation/collation-docs.factor +++ b/extra/unicode/collation/collation-docs.factor @@ -1,7 +1,42 @@ -USING: help.syntax help.markup ; +USING: help.syntax help.markup strings byte-arrays ; IN: unicode.collation ABOUT: "unicode.collation" ARTICLE: "unicode.collation" "Unicode collation algorithm" -"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ; +"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:" +{ $subsection sort-strings } +{ $subsection collation-key } +{ $subsection string<=> } +{ $subsection primary= } +{ $subsection secondary= } +{ $subsection tertiary= } +{ $subsection quaternary= } ; + +HELP: sort-strings +{ $values { "strings" "a sequence of strings" } { "sorted" "the strings in DUCET order" } } +{ $description "This word takes a sequence of strings and sorts them according to the UCA, using code point order as a tie-breaker." } ; + +HELP: collation-key +{ $values { "string" string } { "key" byte-array } } +{ $description "This takes a string and gives a representation of the collation key, which can be compared with <=>" } ; + +HELP: string<=> +{ $values { "str1" string } { "str2" string } { "<=>" "one of +lt+, +gt+ or +eq+" } } +{ $description "This word takes two strings and compares them using the UCA with the DUCET, using code point order as a tie-breaker." } ; + +HELP: primary= +{ $values { "str1" string } { "str2" string } { "?" "t or f" } } +{ $description "This checks whether the first level of collation is identical. This is the least specific kind of equality test. In Latin script, it can be understood as ignoring case, punctuation and accent marks." } ; + +HELP: secondary= +{ $values { "str1" string } { "str2" string } { "?" "t or f" } } +{ $description "This checks whether the first two levels of collation are equal. For Latin script, this means accent marks are significant again, and it is otherwise similar to primary=." } ; + +HELP: tertiary= +{ $values { "str1" string } { "str2" string } { "?" "t or f" } } +{ $description "Along the same lines as secondary=, but case is significant." } ; + +HELP: quaternary= +{ $values { "str1" string } { "str2" string } { "?" "t or f" } } +{ $description "This is similar to tertiary= but it makes punctuation significant again, while still leaving out things like null bytes and Hebrew vowel marks, which mean absolutely nothing in collation." } ; diff --git a/extra/unicode/collation/collation-tests.factor b/extra/unicode/collation/collation-tests.factor index b4a54bb11d..16ac50d5a9 100755 --- a/extra/unicode/collation/collation-tests.factor +++ b/extra/unicode/collation/collation-tests.factor @@ -24,6 +24,9 @@ IN: unicode.collation.tests [ t t f f ] [ "hello" "HELLO" test-equality ] unit-test [ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test [ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test +[ { "good bye" "goodbye" "hello" "HELLO" } ] +[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ] +unit-test parse-test 2 <clumps> [ [ test-two ] assoc-each ] with-null-writer diff --git a/extra/unicode/collation/collation.factor b/extra/unicode/collation/collation.factor index b12a10709e..f71a58be85 100755 --- a/extra/unicode/collation/collation.factor +++ b/extra/unicode/collation/collation.factor @@ -6,6 +6,7 @@ unicode.syntax macros sequences.deep words unicode.breaks quotations ; IN: unicode.collation +<PRIVATE VALUE: ducet TUPLE: weight primary secondary tertiary ignorable? ; @@ -115,6 +116,7 @@ ducet insert-helpers [ [ variable-weight ] each ] } cleave ] { } make ; +PRIVATE> : completely-ignorable? ( weight -- ? ) [ primary>> ] [ secondary>> ] [ tertiary>> ] tri @@ -131,11 +133,13 @@ ducet insert-helpers nfd string>graphemes graphemes>weights filter-ignorable weights>bytes ; +<PRIVATE : insensitive= ( str1 str2 levels-removed -- ? ) [ swap collation-key swap [ [ 0 = not ] right-trim but-last ] times ] curry bi@ = ; +PRIVATE> : primary= ( str1 str2 -- ? ) 3 insensitive= ; @@ -149,17 +153,14 @@ ducet insert-helpers : quaternary= ( str1 str2 -- ? ) 0 insensitive= ; -: compare-collation ( {str1,key} {str2,key} -- <=> ) - 2dup [ second ] bi@ <=> dup +eq+ = - [ drop <=> ] [ 2nip ] if ; - +<PRIVATE : w/collation-key ( str -- {str,key} ) - dup collation-key 2array ; + [ collation-key ] keep 2array ; +PRIVATE> : sort-strings ( strings -- sorted ) [ w/collation-key ] map - [ compare-collation ] sort - keys ; + natural-sort values ; : string<=> ( str1 str2 -- <=> ) - [ w/collation-key ] bi@ compare-collation ; + [ w/collation-key ] compare ; diff --git a/extra/yahoo/authors.txt b/extra/yahoo/authors.txt index f990dd0ed2..382fc3fc09 100644 --- a/extra/yahoo/authors.txt +++ b/extra/yahoo/authors.txt @@ -1 +1,2 @@ Daniel Ehrenberg +Walton Chan diff --git a/extra/yahoo/summary.txt b/extra/yahoo/summary.txt index 662369d96e..98287365af 100644 --- a/extra/yahoo/summary.txt +++ b/extra/yahoo/summary.txt @@ -1 +1 @@ -Yahoo! search example using XML-RPC +Yahoo! search example using XML diff --git a/extra/yahoo/yahoo-tests.factor b/extra/yahoo/yahoo-tests.factor index dc684af726..3776715c7b 100644 --- a/extra/yahoo/yahoo-tests.factor +++ b/extra/yahoo/yahoo-tests.factor @@ -1,4 +1,4 @@ -USING: tools.test yahoo kernel io.files xml sequences ; +USING: tools.test yahoo kernel io.files xml sequences accessors ; [ T{ result @@ -8,4 +8,4 @@ USING: tools.test yahoo kernel io.files xml sequences ; "Official site with news, tour dates, discography, store, community, and more." } ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test -[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 "Factor-search" query ] unit-test +[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=2&similar_ok=1" ] [ "hi" <search> "Factor-search" >>appid 2 >>results t >>similar-ok query ] unit-test diff --git a/extra/yahoo/yahoo.factor b/extra/yahoo/yahoo.factor index 214ad04979..dd7ce962c2 100755 --- a/extra/yahoo/yahoo.factor +++ b/extra/yahoo/yahoo.factor @@ -1,12 +1,15 @@ -! Copyright (C) 2006 Daniel Ehrenberg +! Copyright (C) 2006 Daniel Ehrenberg, Walton Chan ! See http://factorcode.org/license.txt for BSD license. USING: http.client xml xml.utilities kernel sequences -namespaces http math.parser help math.order locals ; +namespaces http math.parser help math.order locals accessors ; IN: yahoo TUPLE: result title url summary ; C: <result> result + +TUPLE: search query results adult-ok start appid region type +format similar-ok language country site subscription license ; : parse-yahoo ( xml -- seq ) "Result" deep-tags-named [ @@ -18,19 +21,44 @@ C: <result> result : yahoo-url ( -- str ) "http://search.yahooapis.com/WebSearchService/V1/webSearch" ; -:: query ( search num appid -- url ) +: param ( search str quot -- search ) + >r over r> call [ url-encode [ % ] bi@ ] [ drop ] if* ; + inline + +: num-param ( search str quot -- search ) + [ dup [ number>string ] when ] compose param ; inline + +: bool-param ( search str quot -- search ) + [ "1" and ] compose param ; inline + +: query ( search -- url ) [ - yahoo-url % - "?appid=" % appid % - "&query=" % search url-encode % - "&results=" % num # + yahoo-url % + "?appid=" [ appid>> ] param + "&query=" [ query>> ] param + "®ion=" [ region>> ] param + "&type=" [ type>> ] param + "&format=" [ format>> ] param + "&language=" [ language>> ] param + "&country=" [ country>> ] param + "&site=" [ site>> ] param + "&subscription=" [ subscription>> ] param + "&license=" [ license>> ] param + "&results=" [ results>> ] num-param + "&start=" [ start>> ] num-param + "&adult_ok=" [ adult-ok>> ] bool-param + "&similar_ok=" [ similar-ok>> ] bool-param + drop ] "" make ; : factor-id "fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ; -: search-yahoo/id ( search num id -- seq ) - query http-get string>xml parse-yahoo ; +: <search> ( query -- search ) + search new + factor-id >>appid + 10 >>results + swap >>query ; -: search-yahoo ( search num -- seq ) - factor-id search-yahoo/id ; +: search-yahoo ( search -- seq ) + query http-get string>xml parse-yahoo ;