Merge branch 'master' of git://factorcode.org/git/littledan

2008-06-02 17:29:06 -05:00 · 2008-06-02 17:29:06 -05:00 · 7003b66bab
parent 94eebc747b 94776f6841
commit 7003b66bab
7 changed files with 92 additions and 24 deletions
--- a/extra/unicode/collation/collation-docs.factor
+++ b/extra/unicode/collation/collation-docs.factor
@ -1,7 +1,42 @@
-USING: help.syntax help.markup ;
+USING: help.syntax help.markup strings byte-arrays ;
 IN: unicode.collation

 ABOUT: "unicode.collation"

 ARTICLE: "unicode.collation" "Unicode collation algorithm"
-"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ;
+"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:"
+{ $subsection sort-strings }
+{ $subsection collation-key }
+{ $subsection string<=> }
+{ $subsection primary= }
+{ $subsection secondary= }
+{ $subsection tertiary= }
+{ $subsection quaternary= } ;
+
+HELP: sort-strings
+{ $values { "strings" "a sequence of strings" } { "sorted" "the strings in DUCET order" } }
+{ $description "This word takes a sequence of strings and sorts them according to the UCA, using code point order as a tie-breaker." } ;
+
+HELP: collation-key
+{ $values { "string" string } { "key" byte-array } }
+{ $description "This takes a string and gives a representation of the collation key, which can be compared with <=>" } ;
+
+HELP: string<=>
+{ $values { "str1" string } { "str2" string } { "<=>" "one of +lt+, +gt+ or +eq+" } }
+{ $description "This word takes two strings and compares them using the UCA with the DUCET, using code point order as a tie-breaker." } ;
+
+HELP: primary=
+{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
+{ $description "This checks whether the first level of collation is identical. This is the least specific kind of equality test. In Latin script, it can be understood as ignoring case, punctuation and accent marks." } ;
+
+HELP: secondary=
+{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
+{ $description "This checks whether the first two levels of collation are equal. For Latin script, this means accent marks are significant again, and it is otherwise similar to primary=." } ;
+
+HELP: tertiary=
+{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
+{ $description "Along the same lines as secondary=, but case is significant." } ;
+
+HELP: quaternary=
+{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
+{ $description "This is similar to tertiary= but it makes punctuation significant again, while still leaving out things like null bytes and Hebrew vowel marks, which mean absolutely nothing in collation." } ;
--- a/extra/unicode/collation/collation-tests.factor
+++ b/extra/unicode/collation/collation-tests.factor
@ -24,6 +24,9 @@ IN: unicode.collation.tests
 [ t t f f ] [ "hello" "HELLO" test-equality ] unit-test
 [ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test
 [ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test
+[ { "good bye" "goodbye" "hello" "HELLO" } ]
+[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ]
+unit-test

 parse-test 2 <clumps>
 [ [ test-two ] assoc-each ] with-null-writer
--- a/extra/unicode/collation/collation.factor
+++ b/extra/unicode/collation/collation.factor
@ -6,6 +6,7 @@ unicode.syntax macros sequences.deep words unicode.breaks
 quotations ;
 IN: unicode.collation

+<PRIVATE
 VALUE: ducet

 TUPLE: weight primary secondary tertiary ignorable? ;
@ -115,6 +116,7 @@ ducet insert-helpers
            [ [ variable-weight ] each ]
        } cleave
    ] { } make ;
+PRIVATE>

 : completely-ignorable? ( weight -- ? )
    [ primary>> ] [ secondary>> ] [ tertiary>> ] tri
@ -131,11 +133,13 @@ ducet insert-helpers
    nfd string>graphemes graphemes>weights
    filter-ignorable weights>bytes ;

+<PRIVATE
 : insensitive= ( str1 str2 levels-removed -- ? )
    [
        swap collation-key swap
        [ [ 0 = not ] right-trim but-last ] times
    ] curry bi@ = ;
+PRIVATE>

 : primary= ( str1 str2 -- ? )
    3 insensitive= ;
@ -149,17 +153,14 @@ ducet insert-helpers
 : quaternary= ( str1 str2 -- ? )
    0 insensitive= ;

-: compare-collation ( {str1,key} {str2,key} -- <=> )
-    2dup [ second ] bi@ <=> dup +eq+ =
-    [ drop <=> ] [ 2nip ] if ;
-
+<PRIVATE
 : w/collation-key ( str -- {str,key} )
-    dup collation-key 2array ;
+    [ collation-key ] keep 2array ;
+PRIVATE>

 : sort-strings ( strings -- sorted )
    [ w/collation-key ] map
-    [ compare-collation ] sort
-    keys ;
+    natural-sort values ;

 : string<=> ( str1 str2 -- <=> )
-    [ w/collation-key ] bi@ compare-collation ;
+    [ w/collation-key ] compare ;
--- a/extra/yahoo/authors.txt
+++ b/extra/yahoo/authors.txt
@ -1 +1,2 @@
 Daniel Ehrenberg
+Walton Chan
--- a/extra/yahoo/summary.txt
+++ b/extra/yahoo/summary.txt
@ -1 +1 @@
-Yahoo! search example using XML-RPC
+Yahoo! search example using XML
--- a/extra/yahoo/yahoo-tests.factor
+++ b/extra/yahoo/yahoo-tests.factor
@ -1,4 +1,4 @@
-USING: tools.test yahoo kernel io.files xml sequences ;
+USING: tools.test yahoo kernel io.files xml sequences accessors ;

 [ T{
    result
@ -8,4 +8,4 @@ USING: tools.test yahoo kernel io.files xml sequences ;
    "Official site with news, tour dates, discography, store, community, and more."
 } ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test

-[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 "Factor-search" query ] unit-test
+[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=2&similar_ok=1" ] [ "hi" <search> "Factor-search" >>appid 2 >>results t >>similar-ok query ] unit-test
--- a/extra/yahoo/yahoo.factor
+++ b/extra/yahoo/yahoo.factor
@ -1,12 +1,15 @@
-! Copyright (C) 2006 Daniel Ehrenberg
+! Copyright (C) 2006 Daniel Ehrenberg, Walton Chan
 ! See http://factorcode.org/license.txt for BSD license.
 USING: http.client xml xml.utilities kernel sequences
-namespaces http math.parser help math.order locals ;
+namespaces http math.parser help math.order locals accessors ;
 IN: yahoo

 TUPLE: result title url summary ;

 C: <result> result
+    
+TUPLE: search query results adult-ok start appid region type
+format similar-ok language country site subscription license ;

 : parse-yahoo ( xml -- seq )
    "Result" deep-tags-named [
@ -18,19 +21,44 @@ C: <result> result
 : yahoo-url ( -- str )
    "http://search.yahooapis.com/WebSearchService/V1/webSearch" ;

-:: query ( search num appid -- url )
+: param ( search str quot -- search )
+    >r over r> call [ url-encode [ % ] bi@ ] [ drop ] if* ;
+    inline
+
+: num-param ( search str quot -- search )
+    [ dup [ number>string ] when ] compose param ; inline
+
+: bool-param ( search str quot -- search )
+    [ "1" and ] compose param ; inline
+
+: query ( search -- url )
    [
-        yahoo-url %
-        "?appid=" % appid %
-        "&query=" % search url-encode %
-        "&results=" % num #
+        yahoo-url %     
+        "?appid=" [ appid>> ] param
+        "&query=" [ query>> ] param
+        "&region=" [ region>> ] param
+        "&type=" [ type>> ] param
+        "&format=" [ format>> ] param
+        "&language=" [ language>> ] param
+        "&country=" [ country>> ] param
+        "&site=" [ site>> ] param
+        "&subscription=" [ subscription>> ] param
+        "&license=" [ license>> ] param
+        "&results=" [ results>> ] num-param
+        "&start=" [ start>> ] num-param
+        "&adult_ok=" [ adult-ok>> ] bool-param
+        "&similar_ok=" [ similar-ok>> ] bool-param
+        drop
    ] "" make ;

 : factor-id
    "fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ;

-: search-yahoo/id ( search num id -- seq )
-    query http-get string>xml parse-yahoo ;
+: <search> ( query -- search )
+    search new
+        factor-id >>appid
+        10 >>results
+        swap >>query ;

-: search-yahoo ( search num -- seq )
-    factor-id search-yahoo/id ;
+: search-yahoo ( search -- seq )
+    query http-get string>xml parse-yahoo ;