Merge branch 'master' of git://factorcode.org/git/factor

db4
Slava Pestov 2009-03-20 19:43:11 -05:00
commit 7e020af18a
7 changed files with 102 additions and 23 deletions

View File

@ -1,22 +1,32 @@
USING: help.markup help.syntax ;
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax assocs kernel sequences ;
IN: interval-maps
HELP: interval-at*
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;
HELP: interval-at
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
{ $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;
HELP: interval-key?
{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
{ $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;
HELP: <interval-map>
{ $values { "specification" "an assoc" } { "map" "an interval map" } }
{ $values { "specification" assoc } { "map" interval-map } }
{ $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;
HELP: interval-values
{ $values { "map" interval-map } { "values" sequence } }
{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
HELP: coalesce
{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
ARTICLE: "interval-maps" "Interval maps"
"The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
$nl
@ -24,7 +34,9 @@ $nl
{ $subsection interval-at* }
{ $subsection interval-at }
{ $subsection interval-key? }
{ $subsection interval-values }
"Use the following to construct interval maps"
{ $subsection <interval-map> } ;
{ $subsection <interval-map> }
{ $subsection coalesce } ;
ABOUT: "interval-maps"

View File

@ -8,17 +8,21 @@ TUPLE: interval-map array ;
<PRIVATE
ALIAS: start first
ALIAS: end second
ALIAS: value third
: find-interval ( key interval-map -- interval-node )
[ first <=> ] with search nip ;
array>> [ start <=> ] with search nip ;
: interval-contains? ( key interval-node -- ? )
first2 between? ;
[ start ] [ end ] bi between? ;
: all-intervals ( sequence -- intervals )
[ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;
: disjoint? ( node1 node2 -- ? )
[ second ] [ first ] bi* < ;
[ end ] [ start ] bi* < ;
: ensure-disjoint ( intervals -- intervals )
dup [ disjoint? ] monotonic?
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
PRIVATE>
: interval-at* ( key map -- value ? )
[ drop ] [ array>> find-interval ] 2bi
[ drop ] [ find-interval ] 2bi
[ nip ] [ interval-contains? ] 2bi
[ third t ] [ drop f f ] if ;
[ value t ] [ drop f f ] if ;
: interval-at ( key map -- value ) interval-at* drop ;
: interval-key? ( key map -- ? ) interval-at* nip ;
: interval-values ( map -- values )
array>> [ value ] map ;
: <interval-map> ( specification -- map )
all-intervals [ [ first second ] compare ] sort
>intervals ensure-disjoint interval-map boa ;

View File

@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )
M: f class-member? 2drop f ;
: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
bi* = ; inline
M: script-class class-member?
[ script-of ] [ script>> ] bi* = ;
[ script-of ] [ script>> ] same? ;
M: category-class class-member?
[ category# ] [ category>> ] bi* = ;
[ category ] [ category>> ] same? ;
M: category-range-class class-member?
[ category first ] [ category>> ] bi* = ;
[ category first ] [ category>> ] same? ;
TUPLE: not-class class ;
PREDICATE: not-integer < not-class class>> integer? ;
UNION: simple-class
primitive-class range-class category-class category-range-class dot ;
primitive-class range-class dot ;
PREDICATE: not-simple < not-class class>> simple-class? ;
M: not-class class-member?

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
combinators regexp.classes strings splitting peg locals accessors
regexp.ast unicode.case ;
regexp.ast unicode.case unicode.script.private unicode.categories
memoize interval-maps sets unicode.data combinators.short-circuit ;
IN: regexp.parser
: allowed-char? ( ch -- ? )
@ -18,15 +19,41 @@ ERROR: bad-number ;
ERROR: bad-class name ;
: simple ( str -- simple )
! Alternatively, first collation key level?
>case-fold [ " \t_" member? not ] filter ;
: simple-table ( seq -- table )
[ [ simple ] keep ] H{ } map>assoc ;
MEMO: simple-script-table ( -- table )
script-table interval-values prune simple-table ;
MEMO: simple-category-table ( -- table )
categories simple-table ;
: parse-unicode-class ( name -- class )
! Implement this!
drop f ;
{
{ [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
>upper first
<category-range-class>
] }
{ [ dup >title categories member? ] [
simple-category-table at <category-class>
] }
{ [ "script=" ?head ] [
dup simple-script-table at
[ <script-class> ]
[ "script=" prepend bad-class ] ?if
] }
[ bad-class ]
} cond ;
: unicode-class ( name -- class )
dup parse-unicode-class [ ] [ bad-class ] ?if ;
: name>class ( name -- class )
>string >case-fold {
>string simple {
{ "lower" letter-class }
{ "upper" LETTER-class }
{ "alpha" Letter-class }

View File

@ -72,8 +72,10 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ { $snippet "\\p{blank}" } "Non-newline whitespace" }
{ { $snippet "\\p{cntrl}" } "Control character" }
{ { $snippet "\\p{space}" } "Whitespace" }
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
"Full unicode properties are not yet supported."
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" }
{ { $snippet "\\p{Nd}" } "Character in Unicode category Nd" }
{ { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" }
{ { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
{ $heading "Boundaries" }
"Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
{ $table

View File

@ -480,3 +480,31 @@ IN: regexp-tests
[ f ] [ "a\r" R/ a$./mds matches? ] unit-test
[ t ] [ "a\n" R/ a$./ms matches? ] unit-test
[ t ] [ "a\n" R/ a$./mds matches? ] unit-test
! Unicode categories
[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
[ f ] [ " " R/ \p{L}/ matches? ] unit-test
[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
[ t ] [ " " R/ \P{L}/ matches? ] unit-test
[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
! These should be case-insensitive
[ f ] [ " " R/ \p{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test

View File

@ -139,10 +139,10 @@ check_library_exists() {
}
check_X11_libraries() {
check_library_exists freetype
check_library_exists GLU
check_library_exists GL
check_library_exists X11
check_library_exists pango
}
check_libraries() {
@ -502,7 +502,7 @@ make_boot_image() {
}
install_build_system_apt() {
sudo apt-get --yes install libc6-dev libpango-1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
sudo apt-get --yes install libc6-dev libpango1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
check_ret sudo
}