Merge branch 'master' of git://factorcode.org/git/factor

db4
Matthew Willis 2008-05-27 20:22:35 -07:00
commit e3c5b66547
43 changed files with 179625 additions and 286 deletions

View File

@ -1,4 +1,4 @@
USING: help.markup help.syntax kernel quotations ; USING: help.markup help.syntax kernel quotations dlists.private ;
IN: dlists IN: dlists
ARTICLE: "dlists" "Doubly-linked lists" ARTICLE: "dlists" "Doubly-linked lists"
@ -51,38 +51,52 @@ HELP: dlist-empty?
HELP: push-front HELP: push-front
{ $values { "obj" "an object" } { "dlist" dlist } } { $values { "obj" "an object" } { "dlist" dlist } }
{ $description "Push the object onto the front of the " { $link dlist } "." } { $description "Push the object onto the front of the " { $link dlist } "." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-back pop-front pop-front* pop-back pop-back* } ;
HELP: push-front*
{ $values { "obj" "an object" } { "dlist" dlist } { "dlist-node" dlist-node } }
{ $description "Push the object onto the front of the " { $link dlist } " and return the newly created " { $snippet "dlist-node" } "." }
{ $notes "This operation is O(1)." } ;
HELP: push-back HELP: push-back
{ $values { "obj" "an object" } { "dlist" dlist } } { $values { "obj" "an object" } { "dlist" dlist } }
{ $description "Push the object onto the back of the " { $link dlist } "." } { $description "Push the object onto the back of the " { $link dlist } "." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-front pop-front pop-front* pop-back pop-back* } ;
HELP: push-back*
{ $values { "obj" "an object" } { "dlist" dlist } { "dlist-node" dlist-node } }
{ $description "Push the object onto the back of the " { $link dlist } " and return the newly created " { $snippet "dlist-node" } "." }
{ $notes "This operation is O(1)." } ;
HELP: peek-front
{ $values { "dlist" dlist } { "obj" "an object" } }
{ $description "Returns the object at the front of the " { $link dlist } "." } ;
HELP: pop-front HELP: pop-front
{ $values { "dlist" dlist } { "obj" "an object" } } { $values { "dlist" dlist } { "obj" "an object" } }
{ $description "Pop the object off the front of the " { $link dlist } " and return the object." } { $description "Pop the object off the front of the " { $link dlist } " and return the object." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-front push-back pop-front* pop-back pop-back* } ;
HELP: pop-front* HELP: pop-front*
{ $values { "dlist" dlist } } { $values { "dlist" dlist } }
{ $description "Pop the object off the front of the " { $link dlist } "." } { $description "Pop the object off the front of the " { $link dlist } "." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-front push-back pop-front pop-back pop-back* } ;
HELP: peek-back
{ $values { "dlist" dlist } { "obj" "an object" } }
{ $description "Returns the object at the back of the " { $link dlist } "." } ;
HELP: pop-back HELP: pop-back
{ $values { "dlist" dlist } { "obj" "an object" } } { $values { "dlist" dlist } { "obj" "an object" } }
{ $description "Pop the object off the back of the " { $link dlist } " and return the object." } { $description "Pop the object off the back of the " { $link dlist } " and return the object." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-front push-back pop-front pop-front* pop-back* } ;
HELP: pop-back* HELP: pop-back*
{ $values { "dlist" dlist } } { $values { "dlist" dlist } }
{ $description "Pop the object off the back of the " { $link dlist } "." } { $description "Pop the object off the back of the " { $link dlist } "." }
{ $notes "This operation is O(1)." } { $notes "This operation is O(1)." } ;
{ $see-also push-front push-back pop-front pop-front* pop-back } ;
{ push-front push-front* push-back push-back* peek-front pop-front pop-front* peek-back pop-back pop-back* } related-words
HELP: dlist-find HELP: dlist-find
{ $values { "dlist" { $link dlist } } { "quot" quotation } { "obj/f" "an object or " { $link f } } { "?" "a boolean" } } { $values { "dlist" { $link dlist } } { "quot" quotation } { "obj/f" "an object or " { $link f } } { "?" "a boolean" } }

View File

@ -1,6 +1,6 @@
USING: dlists dlists.private kernel tools.test random assocs USING: dlists dlists.private kernel tools.test random assocs
sets sequences namespaces sorting debugger io prettyprint sets sequences namespaces sorting debugger io prettyprint
math ; math accessors classes ;
IN: dlists.tests IN: dlists.tests
[ t ] [ <dlist> dlist-empty? ] unit-test [ t ] [ <dlist> dlist-empty? ] unit-test
@ -65,20 +65,17 @@ IN: dlists.tests
: assert-same-elements : assert-same-elements
[ prune natural-sort ] bi@ assert= ; [ prune natural-sort ] bi@ assert= ;
: dlist-push-all [ push-front ] curry each ;
: dlist-delete-all [ dlist-delete drop ] curry each ; : dlist-delete-all [ dlist-delete drop ] curry each ;
: dlist>array [ [ , ] dlist-slurp ] { } make ; : dlist>array [ [ , ] dlist-slurp ] { } make ;
[ ] [ [ ] [
5 [ drop 30 random >fixnum ] map prune 5 [ drop 30 random >fixnum ] map prune
6 [ drop 30 random >fixnum ] map prune 2dup nl . . nl 6 [ drop 30 random >fixnum ] map prune [
[
<dlist> <dlist>
[ dlist-push-all ] keep [ push-all-front ]
[ dlist-delete-all ] keep [ dlist-delete-all ]
dlist>array [ dlist>array ] tri
] 2keep swap diff assert-same-elements ] 2keep swap diff assert-same-elements
] unit-test ] unit-test
@ -95,3 +92,13 @@ IN: dlists.tests
[ 1 ] [ "d" get dlist-length ] unit-test [ 1 ] [ "d" get dlist-length ] unit-test
[ 1 ] [ "d" get dlist>array length ] unit-test [ 1 ] [ "d" get dlist>array length ] unit-test
[ t ] [ <dlist> 4 over push-back 5 over push-back [ obj>> 4 = ] dlist-find-node drop class dlist-node = ] unit-test
[ t ] [ <dlist> 4 over push-back 5 over push-back [ obj>> 5 = ] dlist-find-node drop class dlist-node = ] unit-test
[ t ] [ <dlist> 4 over push-back 5 over push-back* [ = ] curry dlist-find-node drop class dlist-node = ] unit-test
[ ] [ <dlist> 4 over push-back 5 over push-back [ drop ] dlist-each ] unit-test
[ <dlist> peek-front ] must-fail
[ <dlist> peek-back ] must-fail
[ <dlist> pop-front ] [ empty-dlist? ] must-fail-with
[ <dlist> pop-back ] [ empty-dlist? ] must-fail-with

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007, 2008 Mackenzie Straight, Doug Coleman, ! Copyright (C) 2007, 2008 Mackenzie Straight, Doug Coleman,
! Slava Pestov. ! Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: combinators kernel math sequences accessors ; USING: combinators kernel math sequences accessors inspector ;
IN: dlists IN: dlists
TUPLE: dlist front back length ; TUPLE: dlist front back length ;
@ -47,7 +47,7 @@ C: <dlist-node> dlist-node
: (dlist-find-node) ( dlist-node quot -- node/f ? ) : (dlist-find-node) ( dlist-node quot -- node/f ? )
over [ over [
[ >r obj>> r> call ] 2keep rot [ call ] 2keep rot
[ drop t ] [ >r next>> r> (dlist-find-node) ] if [ drop t ] [ >r next>> r> (dlist-find-node) ] if
] [ 2drop f f ] if ; inline ] [ 2drop f f ] if ; inline
@ -55,7 +55,7 @@ C: <dlist-node> dlist-node
>r front>> r> (dlist-find-node) ; inline >r front>> r> (dlist-find-node) ; inline
: dlist-each-node ( dlist quot -- ) : dlist-each-node ( dlist quot -- )
[ t ] compose dlist-find-node 2drop ; inline [ f ] compose dlist-find-node 2drop ; inline
PRIVATE> PRIVATE>
@ -84,11 +84,17 @@ PRIVATE>
: push-all-back ( seq dlist -- ) : push-all-back ( seq dlist -- )
[ push-back ] curry each ; [ push-back ] curry each ;
ERROR: empty-dlist ;
M: empty-dlist summary ( dlist -- )
drop "Emtpy dlist" ;
: peek-front ( dlist -- obj ) : peek-front ( dlist -- obj )
front>> obj>> ; front>> [ empty-dlist ] unless* obj>> ;
: pop-front ( dlist -- obj ) : pop-front ( dlist -- obj )
dup front>> [ dup front>> [ empty-dlist ] unless*
[
dup next>> dup next>>
f rot (>>next) f rot (>>next)
f over set-prev-when f over set-prev-when
@ -96,13 +102,15 @@ PRIVATE>
] 2keep obj>> ] 2keep obj>>
swap [ normalize-back ] keep dec-length ; swap [ normalize-back ] keep dec-length ;
: pop-front* ( dlist -- ) pop-front drop ; : pop-front* ( dlist -- )
pop-front drop ;
: peek-back ( dlist -- obj ) : peek-back ( dlist -- obj )
back>> obj>> ; back>> [ empty-dlist ] unless* obj>> ;
: pop-back ( dlist -- obj ) : pop-back ( dlist -- obj )
dup back>> [ dup back>> [ empty-dlist ] unless*
[
dup prev>> dup prev>>
f rot (>>prev) f rot (>>prev)
f over set-next-when f over set-next-when
@ -110,9 +118,11 @@ PRIVATE>
] 2keep obj>> ] 2keep obj>>
swap [ normalize-front ] keep dec-length ; swap [ normalize-front ] keep dec-length ;
: pop-back* ( dlist -- ) pop-back drop ; : pop-back* ( dlist -- )
pop-back drop ;
: dlist-find ( dlist quot -- obj/f ? ) : dlist-find ( dlist quot -- obj/f ? )
[ obj>> ] prepose
dlist-find-node [ obj>> t ] [ drop f f ] if ; inline dlist-find-node [ obj>> t ] [ drop f f ] if ; inline
: dlist-contains? ( dlist quot -- ? ) : dlist-contains? ( dlist quot -- ? )
@ -141,6 +151,7 @@ PRIVATE>
] if ; inline ] if ; inline
: delete-node-if ( dlist quot -- obj/f ) : delete-node-if ( dlist quot -- obj/f )
[ obj>> ] prepose
delete-node-if* drop ; inline delete-node-if* drop ; inline
: dlist-delete ( obj dlist -- obj/f ) : dlist-delete ( obj dlist -- obj/f )

View File

@ -27,3 +27,15 @@ circular strings ;
! This no longer fails ! This no longer fails
! [ "test" <circular> 5 swap nth ] must-fail ! [ "test" <circular> 5 swap nth ] must-fail
! [ "foo" <circular> CHAR: b 3 rot set-nth ] must-fail ! [ "foo" <circular> CHAR: b 3 rot set-nth ] must-fail
[ { } ] [ 3 <growing-circular> >array ] unit-test
[ { 1 2 } ] [
3 <growing-circular>
[ 1 swap push-growing-circular ] keep
[ 2 swap push-growing-circular ] keep >array
] unit-test
[ { 3 4 5 } ] [
3 <growing-circular> dup { 1 2 3 4 5 } [
swap push-growing-circular
] with each >array
] unit-test

View File

@ -19,10 +19,6 @@ M: circular length seq>> length ;
M: circular virtual@ circular-wrap seq>> ; M: circular virtual@ circular-wrap seq>> ;
M: circular nth virtual@ nth ;
M: circular set-nth virtual@ set-nth ;
M: circular virtual-seq seq>> ; M: circular virtual-seq seq>> ;
: change-circular-start ( n circular -- ) : change-circular-start ( n circular -- )
@ -36,3 +32,20 @@ M: circular virtual-seq seq>> ;
0 <string> <circular> ; 0 <string> <circular> ;
INSTANCE: circular virtual-sequence INSTANCE: circular virtual-sequence
TUPLE: growing-circular < circular length ;
M: growing-circular length length>> ;
: full? ( circular -- ? )
[ length ] [ seq>> length ] bi = ;
: set-peek ( elt seq -- )
[ length 1- ] keep set-nth ;
: push-growing-circular ( elt circular -- )
dup full? [ push-circular ]
[ [ 1+ ] change-length set-peek ] if ;
: <growing-circular> ( capacity -- growing-circular )
{ } new-sequence 0 0 growing-circular boa ;

View File

@ -23,7 +23,7 @@ HELP: ndip
{ $example "USING: combinators.lib kernel prettyprint ;" "1 2 [ dup ] 1 ndip .s" "1\n1\n2" } { $example "USING: combinators.lib kernel prettyprint ;" "1 2 [ dup ] 1 ndip .s" "1\n1\n2" }
{ $example "USING: combinators.lib kernel prettyprint ;" "1 2 3 [ drop ] 2 ndip .s" "2\n3" } { $example "USING: combinators.lib kernel prettyprint ;" "1 2 3 [ drop ] 2 ndip .s" "2\n3" }
} }
{ $see-also dip dipd } ; { $see-also dip 2dip } ;
HELP: nslip HELP: nslip
{ $values { "n" number } } { $values { "n" number } }

View File

@ -5,9 +5,6 @@ IN: combinators.lib.tests
[ 5 ] [ [ 10 random ] [ 5 = ] generate ] unit-test [ 5 ] [ [ 10 random ] [ 5 = ] generate ] unit-test
[ t ] [ [ 10 random ] [ even? ] generate even? ] unit-test [ t ] [ [ 10 random ] [ even? ] generate even? ] unit-test
{ 6 2 } [ 1 2 [ 5 + ] dip ] unit-test
{ 6 2 1 } [ 1 2 1 [ 5 + ] dipd ] unit-test
[ [ 99 ] 1 2 3 4 5 5 nslip ] must-infer [ [ 99 ] 1 2 3 4 5 5 nslip ] must-infer
{ 99 1 2 3 4 5 } [ [ 99 ] 1 2 3 4 5 5 nslip ] unit-test { 99 1 2 3 4 5 } [ [ 99 ] 1 2 3 4 5 5 nslip ] unit-test
[ 1 2 3 4 5 [ drop drop drop drop drop 2 ] 5 nkeep ] must-infer [ 1 2 3 4 5 [ drop drop drop drop drop 2 ] 5 nkeep ] must-infer

View File

@ -38,8 +38,6 @@ MACRO: napply ( n -- )
: 3apply ( obj obj obj quot -- ) 3 napply ; inline : 3apply ( obj obj obj quot -- ) 3 napply ; inline
: dipd ( x y quot -- y ) 2 ndip ; inline
: 2with ( param1 param2 obj quot -- obj curry ) : 2with ( param1 param2 obj quot -- obj curry )
with with ; inline with with ; inline

View File

@ -47,15 +47,9 @@ TUPLE: entry time data ;
SYMBOL: NX SYMBOL: NX
: cache-nx ( query ttl -- ) : cache-nx ( query ttl -- ) ttl->time NX entry boa table-add ;
ttl->time NX entry boa
table-add ;
: nx? ( obj -- ? ) : nx? ( obj -- ? ) dup entry? [ data>> NX = ] [ drop f ] if ;
dup entry?
[ data>> NX = ]
[ drop f ]
if ;
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -72,27 +66,15 @@ SYMBOL: NX
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: entry-expired? ( entry -- ? ) time>> time->ttl 0 <= ; : expired? ( entry -- ? ) time>> time->ttl 0 <= ;
: cache-get ( query -- result ) : cache-get ( query -- result )
dup table-get ! query result dup table-get ! query result
{ {
{ { [ dup f = ] [ 2drop f ] } ! not in the cache
[ dup f = ] ! not in the cache { [ dup expired? ] [ drop table-rem f ] } ! here but expired
[ 2drop f ] { [ dup nx? ] [ 2drop NX ] } ! negative result cached
} { [ t ] [ query+entry->rrs ] } ! good to go
{
[ dup entry-expired? ] ! here but expired
[ drop table-rem f ]
}
{
[ dup nx? ] ! negative result has been cached
[ 2drop NX ]
}
{
[ t ]
[ query+entry->rrs ]
}
} }
cond ; cond ;
@ -114,22 +96,10 @@ SYMBOL: NX
: cache-add ( query rr -- ) : cache-add ( query rr -- )
over table-get ! query rr entry over table-get ! query rr entry
{ {
{ { [ dup f = ] [ drop rr->entry table-add ] }
[ dup f = ] ! not in the cache { [ dup nx? ] [ drop over table-rem rr->entry table-add ] }
[ drop rr->entry table-add ] { [ dup expired? ] [ drop rr->entry table-add ] }
} { [ t ] [ rot drop add-rr-to-entry ] }
{
[ dup nx? ]
[ drop over table-rem rr->entry table-add ]
}
{
[ dup entry-expired? ]
[ drop rr->entry table-add ]
}
{
[ t ]
[ rot drop add-rr-to-entry ]
}
} }
cond ; cond ;

View File

@ -6,6 +6,8 @@ IN: dns.resolver
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Need to cache records even in the case of name error
: cache-message ( message -- message ) : cache-message ( message -- message )
dup dup rcode>> NAME-ERROR = dup dup rcode>> NAME-ERROR =
[ [

View File

@ -16,10 +16,18 @@ IN: farkup.tests
[ "<p>**</p>" ] [ "\\**" convert-farkup ] unit-test [ "<p>**</p>" ] [ "\\**" convert-farkup ] unit-test
[ "" ] [ "\n\n" convert-farkup ] unit-test [ "" ] [ "\n\n" convert-farkup ] unit-test
[ "" ] [ "\r\n\r\n" convert-farkup ] unit-test
[ "" ] [ "\r\r\r\r" convert-farkup ] unit-test
[ "\n" ] [ "\r\r\r" convert-farkup ] unit-test
[ "\n" ] [ "\n\n\n" convert-farkup ] unit-test [ "\n" ] [ "\n\n\n" convert-farkup ] unit-test
[ "<p>foo</p><p>bar</p>" ] [ "foo\n\nbar" convert-farkup ] unit-test [ "<p>foo</p><p>bar</p>" ] [ "foo\n\nbar" convert-farkup ] unit-test
[ "<p>foo</p><p>bar</p>" ] [ "foo\r\n\r\nbar" convert-farkup ] unit-test
[ "<p>foo</p><p>bar</p>" ] [ "foo\r\rbar" convert-farkup ] unit-test
[ "<p>foo</p><p>bar</p>" ] [ "foo\r\r\nbar" convert-farkup ] unit-test
[ "\n<p>bar\n</p>" ] [ "\nbar\n" convert-farkup ] unit-test [ "\n<p>bar\n</p>" ] [ "\nbar\n" convert-farkup ] unit-test
[ "\n<p>bar\n</p>" ] [ "\rbar\r" convert-farkup ] unit-test
[ "\n<p>bar\n</p>" ] [ "\r\nbar\r\n" convert-farkup ] unit-test
[ "<p>foo</p>\n<p>bar</p>" ] [ "foo\n\n\nbar" convert-farkup ] unit-test [ "<p>foo</p>\n<p>bar</p>" ] [ "foo\n\n\nbar" convert-farkup ] unit-test
@ -28,17 +36,17 @@ IN: farkup.tests
[ "<p>|a</p>" ] [ "<p>|a</p>" ]
[ "|a" convert-farkup ] unit-test [ "|a" convert-farkup ] unit-test
[ "<p>|a|</p>" ] [ "<table><tr><td>a</td></tr></table>" ]
[ "|a|" convert-farkup ] unit-test [ "|a|" convert-farkup ] unit-test
[ "<table><tr><td>a</td><td>b</td></tr></table>" ] [ "<table><tr><td>a</td><td>b</td></tr></table>" ]
[ "a|b" convert-farkup ] unit-test [ "|a|b|" convert-farkup ] unit-test
[ "<table><tr><td>a</td><td>b</td></tr></table>\n<table><tr><td>c</td><td>d</td></tr></table>" ] [ "<table><tr><td>a</td><td>b</td></tr><tr><td>c</td><td>d</td></tr></table>" ]
[ "a|b\nc|d" convert-farkup ] unit-test [ "|a|b|\n|c|d|" convert-farkup ] unit-test
[ "<table><tr><td>a</td><td>b</td></tr></table>\n<table><tr><td>c</td><td>d</td></tr></table>\n" ] [ "<table><tr><td>a</td><td>b</td></tr><tr><td>c</td><td>d</td></tr></table>" ]
[ "a|b\nc|d\n" convert-farkup ] unit-test [ "|a|b|\n|c|d|\n" convert-farkup ] unit-test
[ "<p><strong>foo</strong>\n</p><h1>aheading</h1>\n<p>adfasd</p>" ] [ "<p><strong>foo</strong>\n</p><h1>aheading</h1>\n<p>adfasd</p>" ]
[ "*foo*\n=aheading=\nadfasd" convert-farkup ] unit-test [ "*foo*\n=aheading=\nadfasd" convert-farkup ] unit-test

View File

@ -9,14 +9,14 @@ IN: farkup
<PRIVATE <PRIVATE
: delimiters ( -- string ) : delimiters ( -- string )
"*_^~%[-=|\\\n" ; inline "*_^~%[-=|\\\r\n" ; inline
MEMO: text ( -- parser ) MEMO: text ( -- parser )
[ delimiters member? not ] satisfy repeat1 [ delimiters member? not ] satisfy repeat1
[ >string escape-string ] action ; [ >string escape-string ] action ;
MEMO: delimiter ( -- parser ) MEMO: delimiter ( -- parser )
[ dup delimiters member? swap "\n=" member? not and ] satisfy [ dup delimiters member? swap "\r\n=" member? not and ] satisfy
[ 1string ] action ; [ 1string ] action ;
: surround-with-foo ( string tag -- seq ) : surround-with-foo ( string tag -- seq )
@ -37,8 +37,11 @@ MEMO: emphasis ( -- parser ) "_" "em" delimited ;
MEMO: superscript ( -- parser ) "^" "sup" delimited ; MEMO: superscript ( -- parser ) "^" "sup" delimited ;
MEMO: subscript ( -- parser ) "~" "sub" delimited ; MEMO: subscript ( -- parser ) "~" "sub" delimited ;
MEMO: inline-code ( -- parser ) "%" "code" delimited ; MEMO: inline-code ( -- parser ) "%" "code" delimited ;
MEMO: nl ( -- parser ) "\n" token ; MEMO: nl ( -- parser )
MEMO: 2nl ( -- parser ) "\n\n" token hide ; "\r\n" token [ drop "\n" ] action
"\r" token [ drop "\n" ] action
"\n" token 3choice ;
MEMO: 2nl ( -- parser ) nl hide nl hide 2seq ;
MEMO: h1 ( -- parser ) "=" "h1" delimited ; MEMO: h1 ( -- parser ) "=" "h1" delimited ;
MEMO: h2 ( -- parser ) "==" "h2" delimited ; MEMO: h2 ( -- parser ) "==" "h2" delimited ;
MEMO: h3 ( -- parser ) "===" "h3" delimited ; MEMO: h3 ( -- parser ) "===" "h3" delimited ;
@ -119,19 +122,21 @@ MEMO: list-item ( -- parser )
] seq* [ "li" surround-with-foo ] action ; ] seq* [ "li" surround-with-foo ] action ;
MEMO: list ( -- parser ) MEMO: list ( -- parser )
list-item "\n" token hide list-of list-item nl hide list-of
[ "ul" surround-with-foo ] action ; [ "ul" surround-with-foo ] action ;
MEMO: table-column ( -- parser ) MEMO: table-column ( -- parser )
text [ "td" surround-with-foo ] action ; text [ "td" surround-with-foo ] action ;
MEMO: table-row ( -- parser ) MEMO: table-row ( -- parser )
[ "|" token hide
table-column "|" token hide list-of-many , table-column "|" token hide list-of
] seq* [ "tr" surround-with-foo ] action ; "|" token hide nl hide optional 4seq
[ "tr" surround-with-foo ] action ;
MEMO: table ( -- parser ) MEMO: table ( -- parser )
table-row repeat1 [ "table" surround-with-foo ] action ; table-row repeat1
[ "table" surround-with-foo ] action ;
MEMO: code ( -- parser ) MEMO: code ( -- parser )
[ [
@ -151,8 +156,8 @@ MEMO: line ( -- parser )
MEMO: paragraph ( -- parser ) MEMO: paragraph ( -- parser )
line line
"\n" token over 2seq repeat0 nl over 2seq repeat0
"\n" token "\n" token ensure-not 2seq optional 3seq nl nl ensure-not 2seq optional 3seq
[ [
dup [ dup string? not swap [ blank? ] all? or ] deep-all? dup [ dup string? not swap [ blank? ] all? or ] deep-all?
[ "<p>" swap "</p>" 3array ] unless [ "<p>" swap "</p>" 3array ] unless
@ -163,7 +168,7 @@ PRIVATE>
PEG: parse-farkup ( -- parser ) PEG: parse-farkup ( -- parser )
[ [
list , table , h1 , h2 , h3 , h4 , code , paragraph , 2nl , nl , list , table , h1 , h2 , h3 , h4 , code , paragraph , 2nl , nl ,
] choice* repeat0 "\n" token optional 2seq ; ] choice* repeat0 nl optional 2seq ;
: write-farkup ( parse-result -- ) : write-farkup ( parse-result -- )
[ dup string? [ write ] [ drop ] if ] deep-each ; [ dup string? [ write ] [ drop ] if ] deep-each ;

View File

@ -3,25 +3,27 @@ arrays shuffle unicode.case namespaces splitting http
sequences.lib accessors io combinators http.client ; sequences.lib accessors io combinators http.client ;
IN: html.parser.analyzer IN: html.parser.analyzer
TUPLE: link attributes clickable ;
: scrape-html ( url -- vector ) : scrape-html ( url -- vector )
http-get parse-html ; http-get parse-html ;
: (find-relative) : (find-relative)
[ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; [ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; inline
: find-relative ( seq quot n -- i elt ) : find-relative ( seq quot n -- i elt )
>r over [ find drop ] dip r> swap pick >r over [ find drop ] dip r> swap pick
(find-relative) ; (find-relative) ; inline
: (find-all) ( n seq quot -- ) : (find-all) ( n seq quot -- )
2dup >r >r find-from [ 2dup >r >r find-from [
dupd 2array , 1+ r> r> (find-all) dupd 2array , 1+ r> r> (find-all)
] [ ] [
r> r> 3drop r> r> 3drop
] if* ; ] if* ; inline
: find-all ( seq quot -- alist ) : find-all ( seq quot -- alist )
[ 0 -rot (find-all) ] { } make ; [ 0 -rot (find-all) ] { } make ; inline
: (find-nth) ( offset seq quot n count -- obj ) : (find-nth) ( offset seq quot n count -- obj )
>r >r [ find-from ] 2keep 4 npick [ >r >r [ find-from ] 2keep 4 npick [
@ -33,14 +35,14 @@ IN: html.parser.analyzer
] if ] if
] [ ] [
2drop r> r> 2drop 2drop r> r> 2drop
] if ; ] if ; inline
: find-nth ( seq quot n -- i elt ) : find-nth ( seq quot n -- i elt )
0 -roll 0 (find-nth) ; 0 -roll 0 (find-nth) ; inline
: find-nth-relative ( seq quot n offest -- i elt ) : find-nth-relative ( seq quot n offest -- i elt )
>r [ find-nth ] 3keep 2drop nip r> swap pick >r [ find-nth ] 3keep 2drop nip r> swap pick
(find-relative) ; (find-relative) ; inline
: remove-blank-text ( vector -- vector' ) : remove-blank-text ( vector -- vector' )
[ [
@ -120,9 +122,14 @@ IN: html.parser.analyzer
[ [ name>> "a" = ] [ attributes>> "href" swap at ] bi and ] [ [ name>> "a" = ] [ attributes>> "href" swap at ] bi and ]
find-between-all ; find-between-all ;
: <link> ( vector -- link )
[ first attributes>> ]
[ [ name>> { text "img" } member? ] filter ] bi
link boa ;
: link. ( vector -- ) : link. ( vector -- )
[ second text>> write bl ] [ attributes>> "href" swap at write nl ]
[ first tag-link write nl ] bi ; [ clickable>> [ bl bl text>> print ] each nl ] bi ;
: find-by-text ( seq quot -- tag ) : find-by-text ( seq quot -- tag )
[ dup name>> text = ] prepose find drop ; [ dup name>> text = ] prepose find drop ;
@ -136,12 +143,12 @@ IN: html.parser.analyzer
: find-forms ( vector -- vector' ) : find-forms ( vector -- vector' )
"form" over find-opening-tags-by-name "form" over find-opening-tags-by-name
over [ >r first2 r> find-between* ] curry map swap [ >r first2 r> find-between* ] curry map
[ [ name>> { "form" "input" } member? ] filter ] map ; [ [ name>> { "form" "input" } member? ] filter ] map ;
: find-html-objects ( string vector -- vector' ) : find-html-objects ( string vector -- vector' )
find-opening-tags-by-name [ find-opening-tags-by-name ] keep
over [ >r first2 r> find-between* ] curry map ; [ >r first2 r> find-between* ] curry map ;
: form-action ( vector -- string ) : form-action ( vector -- string )
[ name>> "form" = ] find nip [ name>> "form" = ] find nip

View File

@ -44,6 +44,9 @@ PRIVATE>
>intervals ensure-disjoint >tuple-array >intervals ensure-disjoint >tuple-array
interval-map boa ; interval-map boa ;
: <interval-set> ( specification -- map )
[ dup 2array ] map <interval-map> ;
:: coalesce ( alist -- specification ) :: coalesce ( alist -- specification )
! Only works with integer keys, because they're discrete ! Only works with integer keys, because they're discrete
! Makes 2array keys ! Makes 2array keys

View File

@ -19,7 +19,7 @@ dquote = '"'
squote = "'" squote = "'"
digit = [0-9] digit = [0-9]
integer = ("-")? (digit)+ => [[ first2 append string>number ]] integer = ("-")? (digit)+ => [[ first2 append string>number ]]
float = integer "." (digit)* => [[ first3 >string [ number>string ] dipd 3append string>number ]] float = integer "." (digit)* => [[ first3 >string [ number>string ] 2dip 3append string>number ]]
rational = integer "/" (digit)+ => [[ first3 nip string>number / ]] rational = integer "/" (digit)+ => [[ first3 nip string>number / ]]
number = float number = float
| rational | rational

View File

@ -50,7 +50,7 @@ C: <ebnf> ebnf
: syntax-pack ( begin parser end -- parser ) : syntax-pack ( begin parser end -- parser )
#! Parse 'parser' surrounded by syntax elements #! Parse 'parser' surrounded by syntax elements
#! begin and end. #! begin and end.
[ syntax ] dipd syntax pack ; [ syntax ] 2dip syntax pack ;
: 'identifier' ( -- parser ) : 'identifier' ( -- parser )
#! Return a parser that parses an identifer delimited by #! Return a parser that parses an identifer delimited by

View File

@ -35,8 +35,8 @@ IN: project-euler.047
pick pick = [ pick pick = [
swap - nip swap - nip
] [ ] [
dup prime? [ [ drop 0 ] dipd ] [ dup prime? [ [ drop 0 ] 2dip ] [
2dup unique-factors length = [ [ 1+ ] dipd ] [ [ drop 0 ] dipd ] if 2dup unique-factors length = [ [ 1+ ] 2dip ] [ [ drop 0 ] 2dip ] if
] if 1+ (consecutive) ] if 1+ (consecutive)
] if ; ] if ;

View File

@ -35,7 +35,7 @@ IN: reports.noise
{ compose 1/2 } { compose 1/2 }
{ curry 1/3 } { curry 1/3 }
{ dip 1 } { dip 1 }
{ dipd 2 } { 2dip 2 }
{ drop 1/3 } { drop 1/3 }
{ dup 1/3 } { dup 1/3 }
{ if 1/3 } { if 1/3 }

View File

@ -37,3 +37,23 @@ HELP: count
"100 [1,b] [ even? ] count ." "100 [1,b] [ even? ] count ."
"50" "50"
} ; } ;
HELP: if-seq
{ $values { "seq" sequence } { "quot1" quotation } { "quot2" quotation } }
{ $description "Makes an implicit check if the sequence is empty. If the sequence has any elements, " { $snippet "quot1" } " is called on it. Otherwise, the empty sequence is dropped and " { $snippet "quot2" } " is called." }
{ $example
"USING: kernel prettyprint sequences sequences.lib ;"
"{ 1 2 3 } [ sum ] [ \"empty sequence\" throw ] if-seq ."
"6"
} ;
HELP: if-empty
{ $values { "seq" sequence } { "quot1" quotation } { "quot2" quotation } }
{ $description "Makes an implicit check if the sequence is empty. An empty sequence is dropped and " { $snippet "quot1" } " is called. Otherwise, if the sequence has any elements, " { $snippet "quot2" } " is called on it." }
{ $example
"USING: kernel prettyprint sequences sequences.lib ;"
"{ 1 2 3 } [ \"empty sequence\" ] [ sum ] if-empty ."
"6"
} ;
{ if-seq if-empty } related-words

View File

@ -79,3 +79,10 @@ IN: sequences.lib.tests
[ ] [ { } 0 firstn ] unit-test [ ] [ { } 0 firstn ] unit-test
[ "a" ] [ { "a" } 1 firstn ] unit-test [ "a" ] [ { "a" } 1 firstn ] unit-test
[ { { 1 1 } { 1 2 } { 2 0 } } ] [ { { 2 0 } { 1 1 } { 1 2 } } dup [ first ] insertion-sort ] unit-test
[ "empty" ] [ { } [ "not empty" ] [ "empty" ] if-seq ] unit-test
[ { 1 } "not empty" ] [ { 1 } [ "not empty" ] [ "empty" ] if-seq ] unit-test
[ "empty" ] [ { } [ "empty" ] [ "not empty" ] if-empty ] unit-test
[ { 1 } "not empty" ] [ { 1 } [ "empty" ] [ "not empty" ] if-empty ] unit-test

View File

@ -4,7 +4,7 @@
USING: combinators.lib kernel sequences math namespaces assocs USING: combinators.lib kernel sequences math namespaces assocs
random sequences.private shuffle math.functions mirrors random sequences.private shuffle math.functions mirrors
arrays math.parser math.private sorting strings ascii macros arrays math.parser math.private sorting strings ascii macros
assocs.lib quotations hashtables math.order ; assocs.lib quotations hashtables math.order locals ;
IN: sequences.lib IN: sequences.lib
: each-withn ( seq quot n -- ) nwith each ; inline : each-withn ( seq quot n -- ) nwith each ; inline
@ -243,3 +243,23 @@ PRIVATE>
: short ( seq n -- seq n' ) : short ( seq n -- seq n' )
over length min ; inline over length min ; inline
<PRIVATE
:: insert ( seq quot n -- )
n zero? [
n n 1- [ seq nth quot call ] bi@ >= [
n n 1- seq exchange
seq quot n 1- insert
] unless
] unless ; inline
PRIVATE>
: insertion-sort ( seq quot -- )
! quot is a transformation on elements
over length [ insert ] 2with each ; inline
: if-seq ( seq quot1 quot2 -- )
[ f like ] 2dip if* ; inline
: if-empty ( seq quot1 quot2 -- )
swap if-seq ; inline

View File

@ -22,9 +22,9 @@ TUPLE: space-invaders port1 port2i port2o port3o port4lo port4hi port5o bitmap s
: set-bitmap-pixel ( color point array -- ) : set-bitmap-pixel ( color point array -- )
#! 'color' is a {r g b}. Point is {x y}. #! 'color' is a {r g b}. Point is {x y}.
[ bitmap-index ] dip ! color index array [ bitmap-index ] dip ! color index array
[ [ first ] dipd set-uchar-nth ] 3keep [ [ first ] 2dip set-uchar-nth ] 3keep
[ [ second ] dipd [ 1 + ] dip set-uchar-nth ] 3keep [ [ second ] 2dip [ 1 + ] dip set-uchar-nth ] 3keep
[ third ] dipd [ 2 + ] dip set-uchar-nth ; [ third ] 2dip [ 2 + ] dip set-uchar-nth ;
: get-bitmap-pixel ( point array -- color ) : get-bitmap-pixel ( point array -- color )
#! Point is a {x y}. color is a {r g b} #! Point is a {x y}. color is a {r g b}
@ -311,7 +311,7 @@ M: invaders-gadget draw-gadget* ( gadget -- )
: plot-bitmap-bits ( bitmap point byte bit -- ) : plot-bitmap-bits ( bitmap point byte bit -- )
#! point is a {x y}. #! point is a {x y}.
[ first2 ] dipd [ first2 ] 2dip
dup swapd -1 * shift 1 bitand 0 = dup swapd -1 * shift 1 bitand 0 =
[ - 2array ] dip [ - 2array ] dip
[ black ] [ dup get-point-color ] if [ black ] [ dup get-point-color ] if

View File

@ -33,6 +33,13 @@ TUPLE: fica-base-unknown ;
! Employer tax only, not withheld ! Employer tax only, not withheld
: futa-tax-rate ( -- x ) DECIMAL: .062 ; inline : futa-tax-rate ( -- x ) DECIMAL: .062 ; inline
: futa-base-rate ( -- x ) 7000 ; inline
: futa-tax-offset-credit ( -- x ) DECIMAL: .054 ; inline
: futa-tax ( salary w4 -- x )
drop futa-base-rate min
futa-tax-rate futa-tax-offset-credit -
* ;
! No base rate for medicare; all wages subject ! No base rate for medicare; all wages subject
: medicare-tax-rate ( -- x ) DECIMAL: .0145 ; inline : medicare-tax-rate ( -- x ) DECIMAL: .0145 ; inline

View File

@ -1,20 +0,0 @@
# ================================================
# Note: This is only a portion of the original PropList.txt
09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA
09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK
0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA
0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK
0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA
0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK
0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU
0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA
0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
# Total code points: 21

View File

@ -1,7 +1,8 @@
USING: unicode.categories kernel math combinators splitting USING: unicode.categories kernel math combinators splitting
sequences math.parser io.files io assocs arrays namespaces sequences math.parser io.files io assocs arrays namespaces
math.ranges unicode.normalize values io.encodings.ascii math.ranges unicode.normalize values io.encodings.ascii
unicode.syntax unicode.data compiler.units alien.syntax sets ; unicode.syntax unicode.data compiler.units alien.syntax sets
combinators.lib ;
IN: unicode.breaks IN: unicode.breaks
C-ENUM: Any L V T Extend Control CR LF graphemes ; C-ENUM: Any L V T Extend Control CR LF graphemes ;
@ -20,22 +21,10 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
[ drop Control ] [ drop Control ]
} case ; } case ;
: trim-blank ( str -- newstr )
[ blank? ] right-trim ;
: process-other-extend ( lines -- set )
[ "#" split1 drop ";" split1 drop trim-blank ] map harvest
[ ".." split1 [ dup ] unless* [ hex> ] bi@ [a,b] ] map
concat unique ;
: other-extend-lines ( -- lines )
"resource:extra/unicode/PropList.txt" ascii file-lines ;
VALUE: other-extend
CATEGORY: (extend) Me Mn ; CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? ) : extend? ( ch -- ? )
dup (extend)? [ ] [ other-extend key? ] ?if ; [ (extend)? ]
[ "Other_Grapheme_Extend" property? ] or? ;
: grapheme-class ( ch -- class ) : grapheme-class ( ch -- class )
{ {
@ -108,10 +97,7 @@ VALUE: grapheme-table
unclip-last-slice grapheme-class swap unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ; [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
[ init-grapheme-table table
other-extend-lines process-other-extend \ other-extend set-value [ make-grapheme-table finish-table ] with-variable
\ grapheme-table set-value
init-grapheme-table table
[ make-grapheme-table finish-table ] with-variable
\ grapheme-table set-value
] with-compilation-unit

View File

@ -101,9 +101,6 @@ SYMBOL: locale ! Just casing locale, or overall?
: >case-fold ( string -- fold ) : >case-fold ( string -- fold )
>upper >lower ; >upper >lower ;
: insensitive= ( str1 str2 -- ? )
[ >case-fold ] bi@ = ;
: lower? ( string -- ? ) : lower? ( string -- ? )
dup >lower = ; dup >lower = ;
: upper? ( string -- ? ) : upper? ( string -- ? )

View File

@ -5,3 +5,7 @@ USING: tools.test kernel unicode.categories words sequences unicode.syntax ;
printable? alpha? control? uncased? character? printable? alpha? control? uncased? character?
} [ execute ] with map ] unit-test } [ execute ] with map ] unit-test
[ "Nd" ] [ CHAR: 3 category ] unit-test [ "Nd" ] [ CHAR: 3 category ] unit-test
[ "Lo" ] [ HEX: 3400 category ] unit-test
[ "Lo" ] [ HEX: 3450 category ] unit-test
[ "Lo" ] [ HEX: 4DB5 category ] unit-test
[ "Cs" ] [ HEX: DD00 category ] unit-test

File diff suppressed because it is too large Load Diff

19811
extra/unicode/collation/allkeys.txt Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
USING: help.syntax help.markup ;
IN: unicode.collation
ABOUT: "unicode.collation"
ARTICLE: "unicode.collation" "Unicode collation algorithm"
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode." ;

View File

@ -0,0 +1,29 @@
USING: io io.files splitting unicode.collation sequences kernel
io.encodings.utf8 math.parser math.order tools.test assocs
io.streams.null words combinators.lib ;
IN: unicode.collation.tests
: parse-test ( -- strings )
"resource:extra/unicode/collation/CollationTest_SHIFTED.txt"
utf8 file-lines 5 tail
[ ";" split1 drop " " split [ hex> ] "" map-as ] map ;
: test-two ( str1 str2 -- )
[ +lt+ ] -rot [ string<=> ] 2curry unit-test ;
: failures
parse-test dup 2 <clumps>
[ string<=> +lt+ = not ] assoc-filter dup assoc-size ;
: test-equality
{ primary= secondary= tertiary= quaternary= }
[ execute ] 2with each ;
[ f f f f ] [ "hello" "hi" test-equality ] unit-test
[ t f f f ] [ "hello" "hŽllo" test-equality ] unit-test
[ t t f f ] [ "hello" "HELLO" test-equality ] unit-test
[ t t t f ] [ "hello" "h e l l o." test-equality ] unit-test
[ t t t t ] [ "hello" "\0hello\0" test-equality ] unit-test
parse-test 2 <clumps>
[ [ test-two ] assoc-each ] with-null-writer

View File

@ -0,0 +1,165 @@
USING: sequences io.files io.encodings.ascii kernel values
splitting accessors math.parser ascii io assocs strings math
namespaces sorting combinators math.order arrays
unicode.normalize unicode.data combinators.lib locals
unicode.syntax macros sequences.deep words unicode.breaks
quotations ;
IN: unicode.collation
VALUE: ducet
TUPLE: weight primary secondary tertiary ignorable? ;
: parse-weight ( string -- weight )
"]" split but-last [
weight new swap rest unclip CHAR: * = swapd >>ignorable?
swap "." split first3 [ hex> ] tri@
[ >>primary ] [ >>secondary ] [ >>tertiary ] tri*
] map ;
: parse-line ( line -- code-poing weight )
";" split1 [ [ blank? ] trim ] bi@
[ " " split [ hex> ] "" map-as ] [ parse-weight ] bi* ;
: parse-ducet ( stream -- ducet )
lines filter-comments
[ parse-line ] H{ } map>assoc ;
"resource:extra/unicode/collation/allkeys.txt"
ascii <file-reader> parse-ducet \ ducet set-value
! Fix up table for long contractions
: help-one ( assoc key -- )
! Need to be more general? Not for DUCET, apparently
2 head 2dup swap key? [ 2drop ] [
[ [ 1string swap at ] with { } map-as concat ]
[ swap set-at ] 2bi
] if ;
: insert-helpers ( assoc -- )
dup keys [ length 3 >= ] filter
[ help-one ] with each ;
ducet insert-helpers
: base ( char -- base )
{
{ [ dup HEX: 3400 HEX: 4DB5 between? ] [ drop HEX: FB80 ] } ! Extension A
{ [ dup HEX: 20000 HEX: 2A6D6 between? ] [ drop HEX: FB80 ] } ! Extension B
{ [ dup HEX: 4E00 HEX: 9FC3 between? ] [ drop HEX: FB40 ] } ! CJK
[ drop HEX: FBC0 ] ! Other
} cond ;
: AAAA ( char -- weight )
[ base ] [ -15 shift ] bi + HEX: 20 2 f weight boa ;
: BBBB ( char -- weight )
HEX: 7FFF bitand HEX: 8000 bitor 0 0 f weight boa ;
: illegal? ( char -- ? )
[ "Noncharacter_Code_Point" property? ]
[ category "Cs" = ] or? ;
: derive-weight ( char -- weights )
first dup illegal?
[ drop { } ]
[ [ AAAA ] [ BBBB ] bi 2array ] if ;
: last ( -- char )
building get empty? [ 0 ] [ building get peek peek ] if ;
: blocked? ( char -- ? )
combining-class [
last combining-class =
] [ last combining-class ] if* ;
: possible-bases ( -- slice-of-building )
building get dup [ first combining-class not ] find-last
drop [ 0 ] unless* tail-slice ;
:: ?combine ( char slice i -- ? )
[let | str [ i slice nth char suffix ] |
str ducet key? dup
[ str i slice set-nth ] when
] ;
: add ( char -- )
dup blocked? [ 1string , ] [
dup possible-bases dup length
[ ?combine ] 2with contains?
[ drop ] [ 1string , ] if
] if ;
: string>graphemes ( string -- graphemes )
[ [ add ] each ] { } make ;
: graphemes>weights ( graphemes -- weights )
[
dup weight? [ 1array ] ! From tailoring
[ dup ducet at [ ] [ derive-weight ] ?if ] if
] { } map-as concat ;
: append-weights ( weights quot -- )
swap [ ignorable?>> not ] filter
swap map [ zero? not ] filter % 0 , ;
: variable-weight ( weight -- )
dup ignorable?>> [ primary>> ] [ drop HEX: FFFF ] if , ;
: weights>bytes ( weights -- byte-array )
[
{
[ [ primary>> ] append-weights ]
[ [ secondary>> ] append-weights ]
[ [ tertiary>> ] append-weights ]
[ [ variable-weight ] each ]
} cleave
] { } make ;
: completely-ignorable? ( weight -- ? )
[ primary>> ] [ secondary>> ] [ tertiary>> ] tri
[ zero? ] tri@ and and ;
: filter-ignorable ( weights -- weights' )
>r f r> [
tuck primary>> zero? and
[ swap ignorable?>> or ]
[ swap completely-ignorable? or not ] 2bi
] filter nip ;
: collation-key ( string -- key )
nfd string>graphemes graphemes>weights
filter-ignorable weights>bytes ;
: insensitive= ( str1 str2 levels-removed -- ? )
[
swap collation-key swap
[ [ 0 = not ] right-trim but-last ] times
] curry bi@ = ;
: primary= ( str1 str2 -- ? )
3 insensitive= ;
: secondary= ( str1 str2 -- ? )
2 insensitive= ;
: tertiary= ( str1 str2 -- ? )
1 insensitive= ;
: quaternary= ( str1 str2 -- ? )
0 insensitive= ;
: compare-collation ( {str1,key} {str2,key} -- <=> )
2dup [ second ] bi@ <=> dup +eq+ =
[ drop <=> ] [ 2nip ] if ;
: w/collation-key ( str -- {str,key} )
dup collation-key 2array ;
: sort-strings ( strings -- sorted )
[ w/collation-key ] map
[ compare-collation ] sort
keys ;
: string<=> ( str1 str2 -- <=> )
[ w/collation-key ] bi@ compare-collation ;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,264 @@
# SpecialCasing-5.0.0.txt
# Date: 2006-03-03, 08:23:36 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
#
# Special Casing Properties
#
# This file is a supplement to the UnicodeData file.
# It contains additional information about the casing of Unicode characters.
# (For compatibility, the UnicodeData.txt file only contains case mappings for
# characters where they are 1-1, and does not have locale-specific mappings.)
# For more information, see the discussion of Case Mappings in the Unicode Standard.
#
# All code points not listed in this file that do not have a simple case mappings
# in UnicodeData.txt map to themselves.
# ================================================================================
# Format
# ================================================================================
# The entries in this file are in the following machine-readable format:
#
# <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? # <comment>
#
# <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more
# than one character, they are separated by spaces. Other than as used to separate
# elements, spaces are to be ignored.
#
# The <condition_list> is optional. Where present, it consists of one or more locale IDs
# or contexts, separated by spaces. In these conditions:
# - A condition list overrides the normal behavior if all of the listed conditions are true.
# - The context is always the context of the characters in the original string,
# NOT in the resulting string.
# - Case distinctions in the condition list are not significant.
# - Conditions preceded by "Not_" represent the negation of the condition.
#
# A locale ID is defined by taking any language tag as defined by
# RFC 3066 (or its successor), and replacing '-' by '_'.
#
# A context for a character C is defined by Section 3.13 Default Case
# Operations, of The Unicode Standard, Version 5.0.
# (This is identical to the context defined by Unicode 4.1.0,
# as specified in http://www.unicode.org/versions/Unicode4.1.0/)
#
# Parsers of this file must be prepared to deal with future additions to this format:
# * Additional contexts
# * Additional fields
# ================================================================================
# ================================================================================
# Unconditional mappings
# ================================================================================
# The German es-zed is special--the normal mapping is to SS.
# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))
00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
# Preserve canonical equivalence for I with dot. Turkic is handled below.
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# Ligatures
FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# No corresponding uppercase precomposed character
0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when capitalizing iota-subscript (0345)
# It MUST be in normalized form--moved to the end of any sequence of combining marks.
# This is because logically it represents a following base character!
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
# It should never be the first character in a word, so in titlecasing it can be left as is.
# The following cases are already in the UnicodeData file, so are only commented here.
# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
# have special uppercases.
# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
# Some characters with YPOGEGRAMMENI also have no corresponding titlecases
1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
# ================================================================================
# Conditional mappings
# ================================================================================
# Special case for final form of sigma
03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
# Note: the following cases for non-final are already in the UnicodeData file.
# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
# Note: the following cases are not included, since they would case-fold in lowercasing
# 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
# 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
# ================================================================================
# Locale-sensitive mappings
# ================================================================================
# Lithuanian
# Lithuanian retains the dot in a lowercase i when followed by accents.
# Remove DOT ABOVE after "i" with upper or titlecase
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
# Introduce an explicit dot above when lowercasing capital I's and J's
# whenever there are more accents above.
# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
# ================================================================================
# Turkish and Azeri
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
# When uppercasing, i turns into a dotted capital I
0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
# Note: the following case is already in the UnicodeData file.
# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
# EOF

View File

@ -1,24 +1,52 @@
USING: assocs math kernel sequences io.files hashtables USING: assocs math kernel sequences io.files hashtables
quotations splitting arrays math.parser hash2 math.order quotations splitting arrays math.parser hash2 math.order
byte-arrays words namespaces words compiler.units parser byte-arrays words namespaces words compiler.units parser
io.encodings.ascii values ; io.encodings.ascii values interval-maps ascii sets assocs.lib
combinators.lib combinators locals math.ranges sorting ;
IN: unicode.data IN: unicode.data
VALUE: simple-lower
VALUE: simple-upper
VALUE: simple-title
VALUE: canonical-map
VALUE: combine-map
VALUE: class-map
VALUE: compatibility-map
VALUE: category-map
VALUE: name-map
VALUE: special-casing
VALUE: properties
: canonical-entry ( char -- seq ) canonical-map at ;
: combine-chars ( a b -- char/f ) combine-map hash2 ;
: compatibility-entry ( char -- seq ) compatibility-map at ;
: combining-class ( char -- n ) class-map at ;
: non-starter? ( char -- ? ) class-map key? ;
: name>char ( string -- char ) name-map at ;
: char>name ( char -- string ) name-map value-at ;
: property? ( char property -- ? ) properties at interval-key? ;
! Convenience functions ! Convenience functions
: ?between? ( n/f from to -- ? ) : ?between? ( n/f from to -- ? )
pick [ between? ] [ 3drop f ] if ; pick [ between? ] [ 3drop f ] if ;
! Loading data from UnicodeData.txt ! Loading data from UnicodeData.txt
: split-; ( line -- array )
";" split [ [ blank? ] trim ] map ;
: data ( filename -- data ) : data ( filename -- data )
ascii file-lines [ ";" split ] map ; ascii file-lines [ split-; ] map ;
: load-data ( -- data ) : load-data ( -- data )
"resource:extra/unicode/UnicodeData.txt" data ; "resource:extra/unicode/data/UnicodeData.txt" data ;
: filter-comments ( lines -- lines )
[ "#@" split first ] map harvest ;
: (process-data) ( index data -- newdata ) : (process-data) ( index data -- newdata )
filter-comments
[ [ nth ] keep first swap 2array ] with map [ [ nth ] keep first swap 2array ] with map
[ second empty? not ] filter
[ >r hex> r> ] assoc-map ; [ >r hex> r> ] assoc-map ;
: process-data ( index data -- hash ) : process-data ( index data -- hash )
@ -34,7 +62,7 @@ IN: unicode.data
dup [ swap (chain-decomposed) ] curry assoc-map ; dup [ swap (chain-decomposed) ] curry assoc-map ;
: first* ( seq -- ? ) : first* ( seq -- ? )
second dup empty? [ ] [ first ] ?if ; second [ empty? ] [ first ] or? ;
: (process-decomposed) ( data -- alist ) : (process-decomposed) ( data -- alist )
5 swap (process-data) 5 swap (process-data)
@ -46,12 +74,12 @@ IN: unicode.data
[ second length 2 = ] filter [ second length 2 = ] filter
! using 1009 as the size, the maximum load is 4 ! using 1009 as the size, the maximum load is 4
[ first2 first2 rot 3array ] map 1009 alist>hash2 [ first2 first2 rot 3array ] map 1009 alist>hash2
] keep ] [ >hashtable chain-decomposed ] bi ;
>hashtable chain-decomposed ;
: process-compat ( data -- hash ) : process-compatibility ( data -- hash )
(process-decomposed) (process-decomposed)
[ dup first* [ first2 rest 2array ] unless ] map [ dup first* [ first2 rest 2array ] unless ] map
[ second empty? not ] filter
>hashtable chain-decomposed ; >hashtable chain-decomposed ;
: process-combining ( data -- hash ) : process-combining ( data -- hash )
@ -62,23 +90,34 @@ IN: unicode.data
: categories ( -- names ) : categories ( -- names )
! For non-existent characters, use Cn ! For non-existent characters, use Cn
{ "Lu" "Ll" "Lt" "Lm" "Lo" { "Cn"
"Lu" "Ll" "Lt" "Lm" "Lo"
"Mn" "Mc" "Me" "Mn" "Mc" "Me"
"Nd" "Nl" "No" "Nd" "Nl" "No"
"Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po" "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po"
"Sm" "Sc" "Sk" "So" "Sm" "Sc" "Sk" "So"
"Zs" "Zl" "Zp" "Zs" "Zl" "Zp"
"Cc" "Cf" "Cs" "Co" "Cn" } ; "Cc" "Cf" "Cs" "Co" } ;
: unicode-chars HEX: 2FA1E ; : num-chars HEX: 2FA1E ;
! the maximum unicode char in the first 3 planes ! the maximum unicode char in the first 3 planes
: process-category ( data -- category-listing ) : ?set-nth ( val index seq -- )
2 swap (process-data) 2dup bounds-check? [ set-nth ] [ 3drop ] if ;
unicode-chars <byte-array> swap dupd swap [
>r over unicode-chars >= [ r> 3drop ] :: fill-ranges ( table -- table )
[ categories index swap r> set-nth ] if name-map >alist sort-values keys
] curry assoc-each ; [ [ "first>" tail? ] [ "last>" tail? ] or? ] filter
2 group [
[ name>char ] bi@ [ [a,b] ] [ table ?nth ] bi
[ swap table ?set-nth ] curry each
] assoc-each table ;
:: process-category ( data -- category-listing )
[let | table [ num-chars <byte-array> ] |
2 data (process-data) [| char cat |
cat categories index char table ?set-nth
] assoc-each table fill-ranges ] ;
: ascii-lower ( string -- lower ) : ascii-lower ( string -- lower )
[ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ; [ dup CHAR: A CHAR: Z between? [ HEX: 20 + ] when ] map ;
@ -99,39 +138,44 @@ C: <code-point> code-point
4 head [ multihex ] map first4 4 head [ multihex ] map first4
<code-point> swap first set ; <code-point> swap first set ;
VALUE: simple-lower ! Extra properties
VALUE: simple-upper : properties-lines ( -- lines )
VALUE: simple-title "resource:extra/unicode/data/PropList.txt"
VALUE: canonical-map ascii file-lines ;
VALUE: combine-map
VALUE: class-map
VALUE: compat-map
VALUE: category-map
VALUE: name-map
VALUE: special-casing
: canonical-entry ( char -- seq ) canonical-map at ; : parse-properties ( -- {{[a,b],prop}} )
: combine-chars ( a b -- char/f ) combine-map hash2 ; properties-lines filter-comments [
: compat-entry ( char -- seq ) compat-map at ; split-; first2
: combining-class ( char -- n ) class-map at ; [ ".." split1 [ dup ] unless* [ hex> ] bi@ 2array ] dip
: non-starter? ( char -- ? ) class-map key? ; ] { } map>assoc ;
: name>char ( string -- char ) name-map at ;
: char>name ( char -- string ) name-map value-at ; : properties>intervals ( properties -- assoc[str,interval] )
dup values prune [ f ] H{ } map>assoc
[ [ insert-at ] curry assoc-each ] keep
[ <interval-set> ] assoc-map ;
: load-properties ( -- assoc )
parse-properties properties>intervals ;
! Special casing data ! Special casing data
: load-special-casing ( -- special-casing ) : load-special-casing ( -- special-casing )
"resource:extra/unicode/SpecialCasing.txt" data "resource:extra/unicode/data/SpecialCasing.txt" data
[ length 5 = ] filter [ length 5 = ] filter
[ [ set-code-point ] each ] H{ } make-assoc ; [ [ set-code-point ] each ] H{ } make-assoc ;
load-data load-data {
dup process-names \ name-map set-value [ process-names \ name-map set-value ]
13 over process-data \ simple-lower set-value [ 13 swap process-data \ simple-lower set-value ]
12 over process-data tuck \ simple-upper set-value [ 12 swap process-data \ simple-upper set-value ]
14 over process-data swapd assoc-union \ simple-title set-value [ 14 swap process-data
dup process-combining \ class-map set-value simple-upper assoc-union \ simple-title set-value ]
dup process-canonical \ canonical-map set-value [ process-combining \ class-map set-value ]
\ combine-map set-value [ process-canonical \ canonical-map set-value
dup process-compat \ compat-map set-value \ combine-map set-value ]
process-category \ category-map set-value [ process-compatibility \ compatibility-map set-value ]
[ process-category \ category-map set-value ]
} cleave
load-special-casing \ special-casing set-value load-special-casing \ special-casing set-value
load-properties \ properties set-value

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,11 @@
USING: unicode.normalize kernel tools.test sequences ; USING: unicode.normalize kernel tools.test sequences
unicode.data io.encodings.utf8 io.files splitting math.parser
locals math quotations assocs combinators ;
IN: unicode.normalize.tests
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test [ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
[ "ab\u00064b\u00034d\u00034e\u000347\u000346" ] [ "ab\u000346\u000347\u00064b\u00034e\u00034d" dup reorder ] unit-test [ "ab\u00064b\u000347\u00034e\u00034d\u000346" ] [ "ab\u000346\u000347\u00064b\u00034e\u00034d" dup reorder ] unit-test
[ "hello" "hello" ] [ "hello" [ nfd ] keep nfkd ] unit-test [ "hello" "hello" ] [ "hello" [ nfd ] keep nfkd ] unit-test
[ "\u00FB012\u002075\u00017F\u000323\u000307" "fi25s\u000323\u000307" ] [ "\u00FB012\u002075\u00017F\u000323\u000307" "fi25s\u000323\u000307" ]
[ "\u00FB012\u002075\u001E9B\u000323" [ nfd ] keep nfkd ] unit-test [ "\u00FB012\u002075\u001E9B\u000323" [ nfd ] keep nfkd ] unit-test
@ -16,3 +19,26 @@ USING: unicode.normalize kernel tools.test sequences ;
[ f ] [ 0 hangul? ] unit-test [ f ] [ 0 hangul? ] unit-test
[ "\u001112\u001161\u0011ab" ] [ "\u00d55c" nfd ] unit-test [ "\u001112\u001161\u0011ab" ] [ "\u00d55c" nfd ] unit-test
[ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test [ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test
: parse-test ( -- tests )
"resource:extra/unicode/normalize/NormalizationTest.txt"
utf8 file-lines filter-comments
[ ";" split 5 head [ " " split [ hex> ] "" map-as ] map ] map ;
:: assert= ( test spec quot -- )
spec [
[
[ 1- test nth ] bi@
[ 1quotation ] [ quot curry ] bi* unit-test
] with each
] assoc-each ;
: run-line ( test -- )
{
[ { { 2 { 1 2 3 } } { 4 { 4 5 } } } [ nfc ] assert= ]
[ { { 3 { 1 2 3 } } { 5 { 4 5 } } } [ nfd ] assert= ]
[ { { 4 { 1 2 3 4 5 } } } [ nfkc ] assert= ]
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
} cleave ;
! parse-test [ run-line ] each

36
extra/unicode/normalize/normalize.factor Normal file → Executable file
View File

@ -1,4 +1,5 @@
USING: sequences namespaces unicode.data kernel math arrays ; USING: sequences namespaces unicode.data kernel math arrays
locals combinators.lib sequences.lib combinators.lib ;
IN: unicode.normalize IN: unicode.normalize
! Conjoining Jamo behavior ! Conjoining Jamo behavior
@ -26,7 +27,7 @@ IN: unicode.normalize
hangul-base - final-count /mod final-base + hangul-base - final-count /mod final-base +
>r medial-count /mod medial-base + >r medial-count /mod medial-base +
>r initial-base + r> r> >r initial-base + r> r>
dup zero? [ drop 2array ] [ 3array ] if ; dup final-base = [ drop 2array ] [ 3array ] if ;
: jamo>hangul ( initial medial final -- hangul ) : jamo>hangul ( initial medial final -- hangul )
>r >r initial-base - medial-count * >r >r initial-base - medial-count *
@ -35,21 +36,6 @@ IN: unicode.normalize
! Normalization -- Decomposition ! Normalization -- Decomposition
: (insert) ( seq n quot -- )
over 0 = [ 3drop ] [
[ >r dup 1- rot [ nth ] curry bi@ r> bi@ > ] 3keep
roll [ 3drop ]
[ >r [ dup 1- rot exchange ] 2keep 1- r> (insert) ] if
] if ; inline
: insert ( seq quot elt n -- )
swap rot >r -rot [ swap set-nth ] 2keep r> (insert) ; inline
: insertion-sort ( seq quot -- )
! quot is a transformation on elements
over dup length
[ >r >r 2dup r> r> insert ] 2each 2drop ; inline
: reorder-slice ( string start -- slice done? ) : reorder-slice ( string start -- slice done? )
2dup swap [ non-starter? not ] find-from drop 2dup swap [ non-starter? not ] find-from drop
[ [ over length ] unless* rot <slice> ] keep not ; [ [ over length ] unless* rot <slice> ] keep not ;
@ -69,15 +55,17 @@ IN: unicode.normalize
: reorder-back ( string i -- ) : reorder-back ( string i -- )
over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ; over [ non-starter? not ] find-last-from drop ?1+ reorder-next 2drop ;
: decompose ( string quot -- decomposed ) :: decompose ( string quot -- decomposed )
! When there are 8 and 32-bit strings, this'll be ! When there are 8 and 32-bit strings, this'll be
! equivalent to clone on 8 and the contents of the last ! equivalent to clone on 8 and the contents of the last
! main quotation on 32. ! main quotation on 32.
over [ 127 < ] all? [ drop ] [ string [ 127 < ] all? [ string ] [
swap [ [ [
dup hangul? [ hangul>jamo % drop ] string [
[ dup rot call [ % ] [ , ] ?if ] if dup hangul? [ hangul>jamo % ]
] with each ] "" make [ dup quot call [ % ] [ , ] ?if ] if
] each
] "" make
dup reorder dup reorder
] if ; inline ] if ; inline
@ -85,7 +73,7 @@ IN: unicode.normalize
[ canonical-entry ] decompose ; [ canonical-entry ] decompose ;
: nfkd ( string -- string ) : nfkd ( string -- string )
[ compat-entry ] decompose ; [ compatibility-entry ] decompose ;
: string-append ( s1 s2 -- string ) : string-append ( s1 s2 -- string )
! This could be more optimized, ! This could be more optimized,

View File

@ -1,7 +1,7 @@
USING: values kernel sequences assocs io.files USING: values kernel sequences assocs io.files
io.encodings ascii math.ranges io splitting math.parser io.encodings ascii math.ranges io splitting math.parser
namespaces byte-arrays locals math sets io.encodings.ascii namespaces byte-arrays locals math sets io.encodings.ascii
words compiler.units arrays interval-maps ; words compiler.units arrays interval-maps unicode.data ;
IN: unicode.script IN: unicode.script
<PRIVATE <PRIVATE
@ -10,9 +10,7 @@ SYMBOL: interned
: parse-script ( stream -- assoc ) : parse-script ( stream -- assoc )
! assoc is code point/range => name ! assoc is code point/range => name
lines [ "#" split1 drop ] map harvest [ lines filter-comments [ split-; ] map ;
";" split1 [ [ blank? ] trim ] bi@
] H{ } map>assoc ;
: range, ( value key -- ) : range, ( value key -- )
swap interned get swap interned get

View File

@ -8,4 +8,4 @@ USING: tools.test yahoo kernel io.files xml sequences ;
"Official site with news, tour dates, discography, store, community, and more." "Official site with news, tour dates, discography, store, community, and more."
} ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test } ] [ "resource:extra/yahoo/test-results.xml" file>xml parse-yahoo first ] unit-test
[ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 query ] unit-test [ "http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=hi&results=1" ] [ "hi" 1 "Factor-search" query ] unit-test

19
extra/yahoo/yahoo.factor Normal file → Executable file
View File

@ -1,7 +1,7 @@
! Copyright (C) 2006 Daniel Ehrenberg ! Copyright (C) 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: http.client xml xml.utilities kernel sequences USING: http.client xml xml.utilities kernel sequences
namespaces http math.parser help math.order ; namespaces http math.parser help math.order locals ;
IN: yahoo IN: yahoo
TUPLE: result title url summary ; TUPLE: result title url summary ;
@ -16,14 +16,21 @@ C: <result> result
] map ; ] map ;
: yahoo-url ( -- str ) : yahoo-url ( -- str )
"http://search.yahooapis.com/WebSearchService/V1/webSearch?appid=Factor-search&query=" ; "http://search.yahooapis.com/WebSearchService/V1/webSearch" ;
: query ( search num -- url ) :: query ( search num appid -- url )
[ [
yahoo-url % yahoo-url %
swap url-encode % "?appid=" % appid %
"&results=" % # "&query=" % search url-encode %
"&results=" % num #
] "" make ; ] "" make ;
: search-yahoo ( search num -- seq ) : factor-id
"fRrVAKzV34GDyeRw6bUHDhEWHRedwfOC7e61wwXZLgGF80E67spxdQXuugBe2pgIevMmKwA-" ;
: search-yahoo/id ( search num id -- seq )
query http-get string>xml parse-yahoo ; query http-get string>xml parse-yahoo ;
: search-yahoo ( search num -- seq )
factor-id search-yahoo/id ;