diff --git a/build-support/factor.sh b/build-support/factor.sh index 70c522f6cd..c60ab46671 100755 --- a/build-support/factor.sh +++ b/build-support/factor.sh @@ -89,11 +89,6 @@ set_md5sum() { set_gcc() { case $OS in openbsd) ensure_program_installed egcc; CC=egcc;; - netbsd) if [[ $WORD -eq 64 ]] ; then - CC=/usr/pkg/gcc34/bin/gcc - else - CC=gcc - fi ;; *) CC=gcc;; esac } diff --git a/extra/alarms/alarms-docs.factor b/extra/alarms/alarms-docs.factor index 80a0c14079..b25df236c9 100755 --- a/extra/alarms/alarms-docs.factor +++ b/extra/alarms/alarms-docs.factor @@ -2,7 +2,7 @@ IN: alarms USING: help.markup help.syntax calendar quotations ; HELP: alarm -{ $class-description "An alarm. Cancel passed to " { $link cancel-alarm } "." } ; +{ $class-description "An alarm. Can be passed to " { $link cancel-alarm } "." } ; HELP: add-alarm { $values { "quot" quotation } { "time" timestamp } { "frequency" "a " { $link duration } " or " { $link f } } { "alarm" alarm } } diff --git a/extra/io/windows/windows.factor b/extra/io/windows/windows.factor index 97349f5537..abfffdf93a 100755 --- a/extra/io/windows/windows.factor +++ b/extra/io/windows/windows.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien alien.c-types arrays destructors io io.backend io.buffers io.files io.ports io.sockets io.binary -io.sockets windows.errors strings +io.sockets io.timeouts windows.errors strings kernel math namespaces sequences windows windows.kernel32 windows.shell32 windows.types windows.winsock splitting continuations math.bitfields system accessors ; diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor index 8078932877..ea62d2105a 100644 --- a/extra/regexp4/regexp4-tests.factor +++ b/extra/regexp4/regexp4-tests.factor @@ -131,6 +131,116 @@ IN: regexp4-tests [ f ] [ "\\" "[^\\\\]" matches? ] unit-test [ t ] [ "a" "[^\\\\]" matches? ] unit-test +[ t ] [ "0" "[\\d]" matches? ] unit-test +[ f ] [ "a" "[\\d]" matches? ] unit-test +[ f ] [ "0" "[^\\d]" matches? ] unit-test +[ t ] [ "a" "[^\\d]" matches? ] unit-test + +[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" matches? ] unit-test +[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" matches? ] unit-test +[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" matches? ] unit-test + +[ t ] [ "1000" "\\d{4,6}" matches? ] unit-test +[ t ] [ "1000" "[0-9]{4,6}" matches? ] unit-test + +[ t ] [ "abc" "\\p{Lower}{3}" matches? ] unit-test +[ f ] [ "ABC" "\\p{Lower}{3}" matches? ] unit-test +[ t ] [ "ABC" "\\p{Upper}{3}" matches? ] unit-test +[ f ] [ "abc" "\\p{Upper}{3}" matches? ] unit-test + +[ f ] [ "abc" "[\\p{Upper}]{3}" matches? ] unit-test +[ t ] [ "ABC" "[\\p{Upper}]{3}" matches? ] unit-test + +[ t ] [ "" "\\Q\\E" matches? ] unit-test +[ f ] [ "a" "\\Q\\E" matches? ] unit-test +[ t ] [ "|*+" "\\Q|*+\\E" matches? ] unit-test +[ f ] [ "abc" "\\Q|*+\\E" matches? ] unit-test + +[ t ] [ "S" "\\0123" matches? ] unit-test +[ t ] [ "SXY" "\\0123XY" matches? ] unit-test +[ t ] [ "x" "\\x78" matches? ] unit-test +[ f ] [ "y" "\\x78" matches? ] unit-test +[ t ] [ "x" "\\u000078" matches? ] unit-test +[ f ] [ "y" "\\u000078" matches? ] unit-test + +[ t ] [ "ab" "a+b" matches? ] unit-test +[ f ] [ "b" "a+b" matches? ] unit-test +[ t ] [ "aab" "a+b" matches? ] unit-test +[ f ] [ "abb" "a+b" matches? ] unit-test + +[ t ] [ "abbbb" "ab*" matches? ] unit-test +[ t ] [ "a" "ab*" matches? ] unit-test +[ f ] [ "abab" "ab*" matches? ] unit-test + +[ f ] [ "x" "\\." matches? ] unit-test +[ t ] [ "." "\\." matches? ] unit-test + +[ t ] [ "aaaab" "a+ab" matches? ] unit-test +[ f ] [ "aaaxb" "a+ab" matches? ] unit-test +[ t ] [ "aaacb" "a+cb" matches? ] unit-test +[ f ] [ "aaaab" "a++ab" matches? ] unit-test +[ t ] [ "aaacb" "a++cb" matches? ] unit-test + +[ 3 ] [ "aaacb" "a*" match-head ] unit-test +[ 1 ] [ "aaacb" "a+?" match-head ] unit-test +[ 2 ] [ "aaacb" "aa?" match-head ] unit-test +[ 1 ] [ "aaacb" "aa??" match-head ] unit-test +[ 3 ] [ "aacb" "aa?c" match-head ] unit-test +[ 3 ] [ "aacb" "aa??c" match-head ] unit-test + +! [ t ] [ "aaa" "AAA" t matches? ] unit-test +! [ f ] [ "aax" "AAA" t matches? ] unit-test +! [ t ] [ "aaa" "A*" t matches? ] unit-test +! [ f ] [ "aaba" "A*" t matches? ] unit-test +! [ t ] [ "b" "[AB]" t matches? ] unit-test +! [ f ] [ "c" "[AB]" t matches? ] unit-test +! [ t ] [ "c" "[A-Z]" t matches? ] unit-test +! [ f ] [ "3" "[A-Z]" t matches? ] unit-test + +[ ] [ + "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))" + drop +] unit-test + +[ t ] [ "fxxbar" "(?!foo).{3}bar" matches? ] unit-test +[ f ] [ "foobar" "(?!foo).{3}bar" matches? ] unit-test + +! [ 3 ] [ "foobar" "foo(?=bar)" match-head ] unit-test +! [ f ] [ "foobxr" "foo(?=bar)" match-head ] unit-test + +! [ f ] [ "foobxr" "foo\\z" match-head ] unit-test +! [ 3 ] [ "foo" "foo\\z" match-head ] unit-test + +! [ 3 ] [ "foo bar" "foo\\b" match-head ] unit-test +! [ f ] [ "fooxbar" "foo\\b" matches? ] unit-test +! [ t ] [ "foo" "foo\\b" matches? ] unit-test +! [ t ] [ "foo bar" "foo\\b bar" matches? ] unit-test +! [ f ] [ "fooxbar" "foo\\bxbar" matches? ] unit-test +! [ f ] [ "foo" "foo\\bbar" matches? ] unit-test + +! [ f ] [ "foo bar" "foo\\B" matches? ] unit-test +! [ 3 ] [ "fooxbar" "foo\\B" match-head ] unit-test +! [ t ] [ "foo" "foo\\B" matches? ] unit-test +! [ f ] [ "foo bar" "foo\\B bar" matches? ] unit-test +! [ t ] [ "fooxbar" "foo\\Bxbar" matches? ] unit-test +! [ f ] [ "foo" "foo\\Bbar" matches? ] unit-test + +! [ t ] [ "s@f" "[a-z.-]@[a-z]" matches? ] unit-test +! [ f ] [ "a" "[a-z.-]@[a-z]" matches? ] unit-test +! [ t ] [ ".o" "\\.[a-z]" matches? ] unit-test + +! Bug in parsing word +[ t ] [ + "a" + R' a' + matches? +] unit-test + + + + + + ! ((A)(B(C))) ! 1. ((A)(B(C))) diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor index add2e0c477..2957244bcf 100644 --- a/extra/regexp4/regexp4.factor +++ b/extra/regexp4/regexp4.factor @@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math sequences namespaces locals combinators.lib state-tables math.parser state-parser sets dlists unicode.categories math.order quotations shuffle math.ranges splitting -symbols fry ; +symbols fry parser ; IN: regexp4 SYMBOLS: eps start-state final-state beginning-of-text @@ -544,6 +544,33 @@ ERROR: unsupported-token token ; >>nfa dup [ parse-raw-regexp ] [ subset-construction ] bi ; +! Literal syntax for regexps +: parse-options ( string -- ? ) + #! Lame + { + { "" [ f ] } + { "i" [ t ] } + } case ; + +: parse-regexp ( accum end -- accum ) + lexer get dup skip-blank + [ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column + ! lexer get dup still-parsing-line? + ! [ (parse-token) parse-options ] [ drop f ] if + parsed ; + +: R! CHAR: ! parse-regexp ; parsing +: R" CHAR: " parse-regexp ; parsing +: R# CHAR: # parse-regexp ; parsing +: R' CHAR: ' parse-regexp ; parsing +: R( CHAR: ) parse-regexp ; parsing +: R/ CHAR: / parse-regexp ; parsing +: R@ CHAR: @ parse-regexp ; parsing +: R[ CHAR: ] parse-regexp ; parsing +: R` CHAR: ` parse-regexp ; parsing +: R{ CHAR: } parse-regexp ; parsing +: R| CHAR: | parse-regexp ; parsing + TUPLE: dfa-traverser dfa last-state current-state @@ -611,6 +638,9 @@ TUPLE: dfa-traverser : matches? ( string regexp -- ? ) dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ; +: match-head ( string regexp -- end ) + match length>> ; + ! character classes ! TUPLE: range-class from to ; ! TUPLE: or-class left right ;