Merge branch 'master' of git://factorcode.org/git/factor

2009-03-20 19:43:11 -05:00 · 2009-03-20 19:43:11 -05:00 · 7e020af18a
parent b06499605c 5e80ffb0be
commit 7e020af18a
7 changed files with 102 additions and 23 deletions
--- a/basis/interval-maps/interval-maps-docs.factor
+++ b/basis/interval-maps/interval-maps-docs.factor
@ -1,22 +1,32 @@
-USING: help.markup help.syntax ;
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
+! See http://factorcode.org/license.txt for BSD license.
+USING: help.markup help.syntax assocs kernel sequences ;
 IN: interval-maps

 HELP: interval-at*
-{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
+{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
 { $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;

 HELP: interval-at
-{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
+{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
 { $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;

 HELP: interval-key?
-{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
+{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
 { $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;

 HELP: <interval-map>
-{ $values { "specification" "an assoc" } { "map" "an interval map" } }
+{ $values { "specification" assoc } { "map" interval-map } }
 { $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;

+HELP: interval-values
+{ $values { "map" interval-map } { "values" sequence } }
+{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
+
+HELP: coalesce
+{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
+{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
+
 ARTICLE: "interval-maps" "Interval maps"
 "The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
 $nl
@ -24,7 +34,9 @@ $nl
 { $subsection interval-at* }
 { $subsection interval-at }
 { $subsection interval-key? }
+{ $subsection interval-values }
 "Use the following to construct interval maps"
-{ $subsection <interval-map> } ;
+{ $subsection <interval-map> }
+{ $subsection coalesce } ;

 ABOUT: "interval-maps"
--- a/basis/interval-maps/interval-maps.factor
+++ b/basis/interval-maps/interval-maps.factor
@ -8,17 +8,21 @@ TUPLE: interval-map array ;

 <PRIVATE

+ALIAS: start first
+ALIAS: end second
+ALIAS: value third
+
 : find-interval ( key interval-map -- interval-node )
-    [ first <=> ] with search nip ;
+    array>> [ start <=> ] with search nip ;

 : interval-contains? ( key interval-node -- ? )
-    first2 between? ;
+    [ start ] [ end ] bi between? ;

 : all-intervals ( sequence -- intervals )
    [ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;

 : disjoint? ( node1 node2 -- ? )
-    [ second ] [ first ] bi* < ;
+    [ end ] [ start ] bi* < ;

 : ensure-disjoint ( intervals -- intervals )
    dup [ disjoint? ] monotonic?
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
 PRIVATE>

 : interval-at* ( key map -- value ? )
-    [ drop ] [ array>> find-interval ] 2bi
+    [ drop ] [ find-interval ] 2bi
    [ nip ] [ interval-contains? ] 2bi
-    [ third t ] [ drop f f ] if ;
+    [ value t ] [ drop f f ] if ;

 : interval-at ( key map -- value ) interval-at* drop ;

 : interval-key? ( key map -- ? ) interval-at* nip ;

+: interval-values ( map -- values )
+    array>> [ value ] map ;
+
 : <interval-map> ( specification -- map )
    all-intervals [ [ first second ] compare ] sort
    >intervals ensure-disjoint interval-map boa ;
--- a/basis/regexp/classes/classes.factor
+++ b/basis/regexp/classes/classes.factor
@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )

 M: f class-member? 2drop f ;

+: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
+    bi* = ; inline
+
 M: script-class class-member?
-    [ script-of ] [ script>> ] bi* = ;
+    [ script-of ] [ script>> ] same? ;

 M: category-class class-member?
-    [ category# ] [ category>> ] bi* = ;
+    [ category ] [ category>> ] same? ;

 M: category-range-class class-member?
-    [ category first ] [ category>> ] bi* = ;
+    [ category first ] [ category>> ] same? ;

 TUPLE: not-class class ;

 PREDICATE: not-integer < not-class class>> integer? ;

 UNION: simple-class
-    primitive-class range-class category-class category-range-class dot ;
+    primitive-class range-class dot ;
 PREDICATE: not-simple < not-class class>> simple-class? ;

 M: not-class class-member?
--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@ -2,7 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
 combinators regexp.classes strings splitting peg locals accessors
-regexp.ast unicode.case ;
+regexp.ast unicode.case unicode.script.private unicode.categories
+memoize interval-maps sets unicode.data combinators.short-circuit ;
 IN: regexp.parser

 : allowed-char? ( ch -- ? )
@ -18,15 +19,41 @@ ERROR: bad-number ;

 ERROR: bad-class name ;

+: simple ( str -- simple )
+    ! Alternatively, first collation key level?
+    >case-fold [ " \t_" member? not ] filter ;
+
+: simple-table ( seq -- table )
+    [ [ simple ] keep ] H{ } map>assoc ;
+
+MEMO: simple-script-table ( -- table )
+    script-table interval-values prune simple-table ;
+
+MEMO: simple-category-table ( -- table )
+    categories simple-table ;
+
 : parse-unicode-class ( name -- class )
-    ! Implement this!
-    drop f ;
+    {
+        { [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
+            >upper first
+            <category-range-class>
+        ] }
+        { [ dup >title categories member? ] [
+            simple-category-table at <category-class>
+        ] }
+        { [ "script=" ?head ] [
+            dup simple-script-table at
+            [ <script-class> ]
+            [ "script=" prepend bad-class ] ?if
+        ] }
+        [ bad-class ]
+    } cond ;

 : unicode-class ( name -- class )
    dup parse-unicode-class [ ] [ bad-class ] ?if ;

 : name>class ( name -- class )
-    >string >case-fold {
+    >string simple {
        { "lower" letter-class }
        { "upper" LETTER-class }
        { "alpha" Letter-class }
--- a/basis/regexp/regexp-docs.factor
+++ b/basis/regexp/regexp-docs.factor
@ -72,8 +72,10 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
    { { $snippet "\\p{blank}" } "Non-newline whitespace" }
    { { $snippet "\\p{cntrl}" } "Control character" }
    { { $snippet "\\p{space}" } "Whitespace" }
-    { { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
-"Full unicode properties are not yet supported."
+    { { $snippet "\\p{xdigit}" } "Hexidecimal digit" }
+    { { $snippet "\\p{Nd}" } "Character in Unicode category Nd" } 
+    { { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" } 
+    { { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
 { $heading "Boundaries" }
 "Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
 { $table
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -480,3 +480,31 @@ IN: regexp-tests
 [ f ] [ "a\r" R/ a$./mds matches? ] unit-test
 [ t ] [ "a\n" R/ a$./ms matches? ] unit-test
 [ t ] [ "a\n" R/ a$./mds matches? ] unit-test
+
+! Unicode categories
+[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
+[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
+[ f ] [ " " R/ \p{L}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
+[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
+[ t ] [ " " R/ \P{L}/ matches? ] unit-test
+
+[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
+[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
+[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
+
+[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
+[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
+[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
+
+! These should be case-insensitive
+[ f ] [ " " R/ \p{l}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
+[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
+[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
+[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
--- a/build-support/factor.sh
+++ b/build-support/factor.sh
@ -139,10 +139,10 @@ check_library_exists() {
 }

 check_X11_libraries() {
-    check_library_exists freetype
    check_library_exists GLU
    check_library_exists GL
    check_library_exists X11
+    check_library_exists pango
 }

 check_libraries() {
@ -502,7 +502,7 @@ make_boot_image() {
 }

 install_build_system_apt() {
-    sudo apt-get --yes install libc6-dev libpango-1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
+    sudo apt-get --yes install libc6-dev libpango1.0-dev libx11-dev xorg-dev glutg3-dev wget git-core git-doc rlwrap gcc make
    check_ret sudo
 }