Merge branch 'master' of git://factorcode.org/git/littledan
commit
7003b66bab
|
@ -1,7 +1,42 @@
|
|||
USING: help.syntax help.markup ;
|
||||
USING: help.syntax help.markup strings byte-arrays ;
|
||||
IN: unicode.collation
|
||||
|
||||
ABOUT: "unicode.collation"
|
||||
|
||||
ARTICLE: "unicode.collation" "Unicode collation algorithm"
|
||||
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ;
|
||||
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:"
|
||||
{ $subsection sort-strings }
|
||||
{ $subsection collation-key }
|
||||
{ $subsection string<=> }
|
||||
{ $subsection primary= }
|
||||
{ $subsection secondary= }
|
||||
{ $subsection tertiary= }
|
||||
{ $subsection quaternary= } ;
|
||||
|
||||
HELP: sort-strings
|
||||
{ $values { "strings" "a sequence of strings" } { "sorted" "the strings in DUCET order" } }
|
||||
{ $description "This word takes a sequence of strings and sorts them according to the UCA, using code point order as a tie-breaker." } ;
|
||||
|
||||
HELP: collation-key
|
||||
{ $values { "string" string } { "key" byte-array } }
|
||||
{ $description "This takes a string and gives a representation of the collation key, which can be compared with <=>" } ;
|
||||
|
||||
HELP: string<=>
|
||||
{ $values { "str1" string } { "str2" string } { "<=>" "one of +lt+, +gt+ or +eq+" } }
|
||||
{ $description "This word takes two strings and compares them using the UCA with the DUCET, using code point order as a tie-breaker." } ;
|
||||
|
||||
HELP: primary=
|
||||
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
|
||||
{ $description "This checks whether the first level of collation is identical. This is the least specific kind of equality test. In Latin script, it can be understood as ignoring case, punctuation and accent marks." } ;
|
||||
|
||||
HELP: secondary=
|
||||
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
|
||||
{ $description "This checks whether the first two levels of collation are equal. For Latin script, this means accent marks are significant again, and it is otherwise similar to primary=." } ;
|
||||
|
||||
HELP: tertiary=
|
||||
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
|
||||
{ $description "Along the same lines as secondary=, but case is significant." } ;
|
||||
|
||||
HELP: quaternary=
|
||||
{ $values { "str1" string } { "str2" string } { "?" "t or f" } }
|
||||
{ $description "This is similar to tertiary= but it makes punctuation significant again, while still leaving out things like null bytes and Hebrew vowel marks, which mean absolutely nothing in collation." } ;
|
||||
|
|
|
@ -24,6 +24,9 @@ IN: unicode.collation.tests
|
|||
[ t t f f ] [ "hello" "HELLO" test-equality ] unit-test
|
||||
[ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test
|
||||
[ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test
|
||||
[ { "good bye" "goodbye" "hello" "HELLO" } ]
|
||||
[ { "HELLO" "goodbye" "good bye" "hello" } sort-strings ]
|
||||
unit-test
|
||||
|
||||
parse-test 2 <clumps>
|
||||
[ [ test-two ] assoc-each ] with-null-writer
|
||||
|
|
|
@ -6,6 +6,7 @@ unicode.syntax macros sequences.deep words unicode.breaks
|
|||
quotations ;
|
||||
IN: unicode.collation
|
||||
|
||||
<PRIVATE
|
||||
VALUE: ducet
|
||||
|
||||
TUPLE: weight primary secondary tertiary ignorable? ;
|
||||
|
@ -115,6 +116,7 @@ ducet insert-helpers
|
|||
[ [ variable-weight ] each ]
|
||||
} cleave
|
||||
] { } make ;
|
||||
PRIVATE>
|
||||
|
||||
: completely-ignorable? ( weight -- ? )
|
||||
[ primary>> ] [ secondary>> ] [ tertiary>> ] tri
|
||||
|
@ -131,11 +133,13 @@ ducet insert-helpers
|
|||
nfd string>graphemes graphemes>weights
|
||||
filter-ignorable weights>bytes ;
|
||||
|
||||
<PRIVATE
|
||||
: insensitive= ( str1 str2 levels-removed -- ? )
|
||||
[
|
||||
swap collation-key swap
|
||||
[ [ 0 = not ] right-trim but-last ] times
|
||||
] curry bi@ = ;
|
||||
PRIVATE>
|
||||
|
||||
: primary= ( str1 str2 -- ? )
|
||||
3 insensitive= ;
|
||||
|
@ -149,17 +153,14 @@ ducet insert-helpers
|
|||
: quaternary= ( str1 str2 -- ? )
|
||||
0 insensitive= ;
|
||||
|
||||
: compare-collation ( {str1,key} {str2,key} -- <=> )
|
||||
2dup [ second ] bi@ <=> dup +eq+ =
|
||||
[ drop <=> ] [ 2nip ] if ;
|
||||
|
||||
<PRIVATE
|
||||
: w/collation-key ( str -- {str,key} )
|
||||
dup collation-key 2array ;
|
||||
[ collation-key ] keep 2array ;
|
||||
PRIVATE>
|
||||
|
||||
: sort-strings ( strings -- sorted )
|
||||
[ w/collation-key ] map
|
||||
[ compare-collation ] sort
|
||||
keys ;
|
||||
natural-sort values ;
|
||||
|
||||
: string<=> ( str1 str2 -- <=> )
|
||||
[ w/collation-key ] bi@ compare-collation ;
|
||||
[ w/collation-key ] compare ;
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
Daniel Ehrenberg
|
||||
Walton Chan
|
||||
|
|
|
@ -1 +1 @@
|
|||
Yahoo! search example using XML-RPC
|
||||
Yahoo! search example using XML
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: tools.test yahoo kernel io.files xml sequences ;
|
||||
USING: tools.test yahoo kernel io.files xml sequences accessors ;
|
||||
|
||||
[ T{
|
||||
result
|
||||
|
@ -8,4 +8,4 @@ USING: tools.test yahoo kernel io.files xml sequences ;
|
|||
"Official site with news, tour dates, discography, store, community, and more."
|
||||
} ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test
|
||||
|
||||
[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 "Factor-search" query ] unit-test
|
||||
[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=2&similar_ok=1" ] [ "hi" <search> "Factor-search" >>appid 2 >>results t >>similar-ok query ] unit-test
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
! Copyright (C) 2006 Daniel Ehrenberg
|
||||
! Copyright (C) 2006 Daniel Ehrenberg, Walton Chan
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: http.client xml xml.utilities kernel sequences
|
||||
namespaces http math.parser help math.order locals ;
|
||||
namespaces http math.parser help math.order locals accessors ;
|
||||
IN: yahoo
|
||||
|
||||
TUPLE: result title url summary ;
|
||||
|
||||
C: <result> result
|
||||
|
||||
TUPLE: search query results adult-ok start appid region type
|
||||
format similar-ok language country site subscription license ;
|
||||
|
||||
: parse-yahoo ( xml -- seq )
|
||||
"Result" deep-tags-named [
|
||||
|
@ -18,19 +21,44 @@ C: <result> result
|
|||
: yahoo-url ( -- str )
|
||||
"http://search.yahooapis.com/WebSearchService/V1/webSearch" ;
|
||||
|
||||
:: query ( search num appid -- url )
|
||||
: param ( search str quot -- search )
|
||||
>r over r> call [ url-encode [ % ] bi@ ] [ drop ] if* ;
|
||||
inline
|
||||
|
||||
: num-param ( search str quot -- search )
|
||||
[ dup [ number>string ] when ] compose param ; inline
|
||||
|
||||
: bool-param ( search str quot -- search )
|
||||
[ "1" and ] compose param ; inline
|
||||
|
||||
: query ( search -- url )
|
||||
[
|
||||
yahoo-url %
|
||||
"?appid=" % appid %
|
||||
"&query=" % search url-encode %
|
||||
"&results=" % num #
|
||||
yahoo-url %
|
||||
"?appid=" [ appid>> ] param
|
||||
"&query=" [ query>> ] param
|
||||
"®ion=" [ region>> ] param
|
||||
"&type=" [ type>> ] param
|
||||
"&format=" [ format>> ] param
|
||||
"&language=" [ language>> ] param
|
||||
"&country=" [ country>> ] param
|
||||
"&site=" [ site>> ] param
|
||||
"&subscription=" [ subscription>> ] param
|
||||
"&license=" [ license>> ] param
|
||||
"&results=" [ results>> ] num-param
|
||||
"&start=" [ start>> ] num-param
|
||||
"&adult_ok=" [ adult-ok>> ] bool-param
|
||||
"&similar_ok=" [ similar-ok>> ] bool-param
|
||||
drop
|
||||
] "" make ;
|
||||
|
||||
: factor-id
|
||||
"fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ;
|
||||
|
||||
: search-yahoo/id ( search num id -- seq )
|
||||
query http-get string>xml parse-yahoo ;
|
||||
: <search> ( query -- search )
|
||||
search new
|
||||
factor-id >>appid
|
||||
10 >>results
|
||||
swap >>query ;
|
||||
|
||||
: search-yahoo ( search num -- seq )
|
||||
factor-id search-yahoo/id ;
|
||||
: search-yahoo ( search -- seq )
|
||||
query http-get string>xml parse-yahoo ;
|
||||
|
|
Loading…
Reference in New Issue