Merge branch 'master' of git://factorcode.org/git/littledan

db4
Slava Pestov 2008-06-02 17:29:06 -05:00
commit 7003b66bab
7 changed files with 92 additions and 24 deletions

View File

@ -1,7 +1,42 @@
USING: help.syntax help.markup ;
USING: help.syntax help.markup strings byte-arrays ;
IN: unicode.collation
ABOUT: "unicode.collation"
ARTICLE: "unicode.collation" "Unicode collation algorithm"
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ;
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:"
{ $subsection sort-strings }
{ $subsection collation-key }
{ $subsection string<=> }
{ $subsection primary= }
{ $subsection secondary= }
{ $subsection tertiary= }
{ $subsection quaternary= } ;
HELP: sort-strings
{ $values { "strings" "a sequence of strings" } { "sorted" "the strings in DUCET order" } }
{ $description "This word takes a sequence of strings and sorts them according to the UCA, using code point order as a tie-breaker." } ;
HELP: collation-key
{ $values { "string" string } { "key" byte-array } }
{ $description "This takes a string and gives a representation of the collation key, which can be compared with <=>" } ;
HELP: string<=>
{ $values { "str1" string } { "str2" string } { "<=>" "one of +lt+, +gt+ or +eq+" } }
{ $description "This word takes two strings and compares them using the UCA with the DUCET, using code point order as a tie-breaker." } ;
HELP: primary=
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
{ $description "This checks whether the first level of collation is identical. This is the least specific kind of equality test. In Latin script, it can be understood as ignoring case, punctuation and accent marks." } ;
HELP: secondary=
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
{ $description "This checks whether the first two levels of collation are equal. For Latin script, this means accent marks are significant again, and it is otherwise similar to primary=." } ;
HELP: tertiary=
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
{ $description "Along the same lines as secondary=, but case is significant." } ;
HELP: quaternary=
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
{ $description "This is similar to tertiary= but it makes punctuation significant again, while still leaving out things like null bytes and Hebrew vowel marks, which mean absolutely nothing in collation." } ;

View File

@ -24,6 +24,9 @@ IN: unicode.collation.tests
[ t t f f ] [ "hello" "HELLO" test-equality ] unit-test
[ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test
[ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test
[ { "good bye" "goodbye" "hello" "HELLO" } ]
[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ]
unit-test
parse-test 2 <clumps>
[ [ test-two ] assoc-each ] with-null-writer

View File

@ -6,6 +6,7 @@ unicode.syntax macros sequences.deep words unicode.breaks
quotations ;
IN: unicode.collation
<PRIVATE
VALUE: ducet
TUPLE: weight primary secondary tertiary ignorable? ;
@ -115,6 +116,7 @@ ducet insert-helpers
[ [ variable-weight ] each ]
} cleave
] { } make ;
PRIVATE>
: completely-ignorable? ( weight -- ? )
[ primary>> ] [ secondary>> ] [ tertiary>> ] tri
@ -131,11 +133,13 @@ ducet insert-helpers
nfd string>graphemes graphemes>weights
filter-ignorable weights>bytes ;
<PRIVATE
: insensitive= ( str1 str2 levels-removed -- ? )
[
swap collation-key swap
[ [ 0 = not ] right-trim but-last ] times
] curry bi@ = ;
PRIVATE>
: primary= ( str1 str2 -- ? )
3 insensitive= ;
@ -149,17 +153,14 @@ ducet insert-helpers
: quaternary= ( str1 str2 -- ? )
0 insensitive= ;
: compare-collation ( {str1,key} {str2,key} -- <=> )
2dup [ second ] bi@ <=> dup +eq+ =
[ drop <=> ] [ 2nip ] if ;
<PRIVATE
: w/collation-key ( str -- {str,key} )
dup collation-key 2array ;
[ collation-key ] keep 2array ;
PRIVATE>
: sort-strings ( strings -- sorted )
[ w/collation-key ] map
[ compare-collation ] sort
keys ;
natural-sort values ;
: string<=> ( str1 str2 -- <=> )
[ w/collation-key ] bi@ compare-collation ;
[ w/collation-key ] compare ;

View File

@ -1 +1,2 @@
Daniel Ehrenberg
Walton Chan

View File

@ -1 +1 @@
Yahoo! search example using XML-RPC
Yahoo! search example using XML

View File

@ -1,4 +1,4 @@
USING: tools.test yahoo kernel io.files xml sequences ;
USING: tools.test yahoo kernel io.files xml sequences accessors ;
[ T{
result
@ -8,4 +8,4 @@ USING: tools.test yahoo kernel io.files xml sequences ;
"Official site with news, tour dates, discography, store, community, and more."
} ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test
[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 "Factor-search" query ] unit-test
[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=2&similar_ok=1" ] [ "hi" <search> "Factor-search" >>appid 2 >>results t >>similar-ok query ] unit-test

View File

@ -1,12 +1,15 @@
! Copyright (C) 2006 Daniel Ehrenberg
! Copyright (C) 2006 Daniel Ehrenberg, Walton Chan
! See http://factorcode.org/license.txt for BSD license.
USING: http.client xml xml.utilities kernel sequences
namespaces http math.parser help math.order locals ;
namespaces http math.parser help math.order locals accessors ;
IN: yahoo
TUPLE: result title url summary ;
C: <result> result
TUPLE: search query results adult-ok start appid region type
format similar-ok language country site subscription license ;
: parse-yahoo ( xml -- seq )
"Result" deep-tags-named [
@ -18,19 +21,44 @@ C: <result> result
: yahoo-url ( -- str )
"http://search.yahooapis.com/WebSearchService/V1/webSearch" ;
:: query ( search num appid -- url )
: param ( search str quot -- search )
>r over r> call [ url-encode [ % ] bi@ ] [ drop ] if* ;
inline
: num-param ( search str quot -- search )
[ dup [ number>string ] when ] compose param ; inline
: bool-param ( search str quot -- search )
[ "1" and ] compose param ; inline
: query ( search -- url )
[
yahoo-url %
"?appid=" % appid %
"&query=" % search url-encode %
"&results=" % num #
yahoo-url %
"?appid=" [ appid>> ] param
"&query=" [ query>> ] param
"&region=" [ region>> ] param
"&type=" [ type>> ] param
"&format=" [ format>> ] param
"&language=" [ language>> ] param
"&country=" [ country>> ] param
"&site=" [ site>> ] param
"&subscription=" [ subscription>> ] param
"&license=" [ license>> ] param
"&results=" [ results>> ] num-param
"&start=" [ start>> ] num-param
"&adult_ok=" [ adult-ok>> ] bool-param
"&similar_ok=" [ similar-ok>> ] bool-param
drop
] "" make ;
: factor-id
"fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ;
: search-yahoo/id ( search num id -- seq )
query http-get string>xml parse-yahoo ;
: <search> ( query -- search )
search new
factor-id >>appid
10 >>results
swap >>query ;
: search-yahoo ( search num -- seq )
factor-id search-yahoo/id ;
: search-yahoo ( search -- seq )
query http-get string>xml parse-yahoo ;