From 56039876bcf688de7d38d8149de95dd9092d5467 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 26 Nov 2007 12:59:04 -0600 Subject: [PATCH 01/12] Before character-class --- extra/regexp/regexp.factor | 62 +++++++++++++------------------------- 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/extra/regexp/regexp.factor b/extra/regexp/regexp.factor index 79f826bafa..02d66ee59b 100644 --- a/extra/regexp/regexp.factor +++ b/extra/regexp/regexp.factor @@ -1,21 +1,30 @@ -USING: combinators kernel lazy-lists math math.parser +USING: arrays combinators kernel lazy-lists math math.parser namespaces parser parser-combinators parser-combinators.simple -promises sequences strings ; +promises sequences sequences.lib strings ; USING: continuations io prettyprint ; IN: regexp : 'any-char' "." token [ drop any-char-parser ] <@ ; +: escaped-char + { + { CHAR: d [ [ digit? ] satisfy ] } + { CHAR: D [ [ digit? not ] satisfy ] } + { CHAR: s [ [ blank? ] satisfy ] } + { CHAR: S [ [ blank? not ] satisfy ] } + [ ] + } case ; + : 'escaped-char' - "\\" token any-char-parser &> ; + "\\" token any-char-parser &> [ escaped-char ] <@ ; : 'ordinary-char' - [ "*+?|(){}" member? not ] satisfy ; + [ "^*+?|(){}[]" member? not ] satisfy [ 1string token ] <@ ; : 'char' 'escaped-char' 'ordinary-char' <|> ; -: 'string' 'char' <+> [ >string token ] <@ ; +: 'string' 'char' <+> [ [ <&> ] reduce* ] <@ ; : exactly-n ( parser n -- parser' ) swap and-parser construct-boa ; @@ -55,41 +64,13 @@ C: group-result ] <@ ; : 'interval' - 'term' - "{" token - 'integer' &> - "," token <:&:> - 'integer' <:&:> - "}" token <& <&> [ - first2 dup length { - { 1 [ first exactly-n ] } - { 2 [ first2 dup integer? - [ nip at-most-n ] - [ drop at-least-n ] if ] } - { 3 [ first3 nip from-m-to-n ] } - } case - ] <@ ; - -: 'character-range' - any-char-parser "-" token <& any-char-parser &> ; - -: 'character-class-inside' - any-char-parser - 'character-range' <|> ; - -: 'character-class-inclusive' - "[" token - 'character-class-inside' - "]" token ; - -: 'character-class-exclusive' - "[^" token - 'character-class-inside' - "]" token ; - -: 'character-class' - 'character-class-inclusive' - 'character-class-exclusive' <|> ; + 'term' "{" token <& 'integer' <&> "}" token <& [ first2 exactly-n ] <@ + 'term' "{" token <& 'integer' <&> "," token <& "}" token <& + [ first2 at-least-n ] <@ <|> + 'term' "{" token <& "," token <& 'integer' <&> "}" token <& + [ first2 at-most-n ] <@ <|> + 'term' "{" token <& 'integer' <&> "," token <& 'integer' <:&> "}" token <& + [ first3 from-m-to-n ] <@ <|> ; : 'repetition' 'term' @@ -113,7 +94,6 @@ LAZY: 'regexp' ( -- parser ) : 'regexp' just parse-1 ; - GENERIC: >regexp ( obj -- parser ) M: string >regexp 'regexp' just parse-1 ; M: object >regexp ; From 50948ae9db8c6fb90bbca4f5d4f8f4be6ce07c5a Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 26 Nov 2007 17:19:29 -0600 Subject: [PATCH 02/12] Add character classes, fails on one test case [^] Add lots of unit tests --- extra/regexp/regexp-tests.factor | 66 ++++++++++++++++++++++++++++++++ extra/regexp/regexp.factor | 54 ++++++++++++++++++++++---- 2 files changed, 112 insertions(+), 8 deletions(-) diff --git a/extra/regexp/regexp-tests.factor b/extra/regexp/regexp-tests.factor index 597a4f5143..5ebd6dc4d3 100644 --- a/extra/regexp/regexp-tests.factor +++ b/extra/regexp/regexp-tests.factor @@ -29,6 +29,7 @@ IN: regexp-tests [ f ] [ "" "." matches? ] unit-test [ t ] [ "a" "." matches? ] unit-test [ t ] [ "." "." matches? ] unit-test +[ f ] [ "\n" "." matches? ] unit-test [ f ] [ "" ".+" matches? ] unit-test [ t ] [ "a" ".+" matches? ] unit-test @@ -75,3 +76,68 @@ IN: regexp-tests [ t ] [ "aaa" "a{1,3}" matches? ] unit-test [ f ] [ "aaaa" "a{1,3}" matches? ] unit-test +[ f ] [ "" "[a]" matches? ] unit-test +[ t ] [ "a" "[a]" matches? ] unit-test +[ t ] [ "a" "[abc]" matches? ] unit-test +[ f ] [ "b" "[a]" matches? ] unit-test +[ f ] [ "d" "[abc]" matches? ] unit-test +[ t ] [ "ab" "[abc]{1,2}" matches? ] unit-test +[ f ] [ "abc" "[abc]{1,2}" matches? ] unit-test + +[ f ] [ "" "[^a]" matches? ] unit-test +[ f ] [ "a" "[^a]" matches? ] unit-test +[ f ] [ "a" "[^abc]" matches? ] unit-test +[ t ] [ "b" "[^a]" matches? ] unit-test +[ t ] [ "d" "[^abc]" matches? ] unit-test +[ f ] [ "ab" "[^abc]{1,2}" matches? ] unit-test +[ f ] [ "abc" "[^abc]{1,2}" matches? ] unit-test + +[ t ] [ "]" "[]]" matches? ] unit-test +[ f ] [ "]" "[^]]" matches? ] unit-test + +[ "^" "[^]" matches? ] unit-test-fails +[ t ] [ "^" "[]^]" matches? ] unit-test +[ t ] [ "]" "[]^]" matches? ] unit-test + +[ t ] [ "[" "[[]" matches? ] unit-test +[ f ] [ "^" "[^^]" matches? ] unit-test +[ t ] [ "a" "[^^]" matches? ] unit-test + +[ t ] [ "-" "[-]" matches? ] unit-test +[ f ] [ "a" "[-]" matches? ] unit-test +[ f ] [ "-" "[^-]" matches? ] unit-test +[ t ] [ "a" "[^-]" matches? ] unit-test + +[ t ] [ "-" "[-a]" matches? ] unit-test +[ t ] [ "a" "[-a]" matches? ] unit-test +[ t ] [ "-" "[a-]" matches? ] unit-test +[ t ] [ "a" "[a-]" matches? ] unit-test +[ f ] [ "b" "[a-]" matches? ] unit-test +[ f ] [ "-" "[^-]" matches? ] unit-test +[ t ] [ "a" "[^-]" matches? ] unit-test + +[ f ] [ "-" "[a-c]" matches? ] unit-test +[ t ] [ "-" "[^a-c]" matches? ] unit-test +[ t ] [ "b" "[a-c]" matches? ] unit-test +[ f ] [ "b" "[^a-c]" matches? ] unit-test + +[ t ] [ "-" "[a-c-]" matches? ] unit-test +[ f ] [ "-" "[^a-c-]" matches? ] unit-test + +[ t ] [ "\\" "[\\\\]" matches? ] unit-test +[ f ] [ "a" "[\\\\]" matches? ] unit-test +[ f ] [ "\\" "[^\\\\]" matches? ] unit-test +[ t ] [ "a" "[^\\\\]" matches? ] unit-test + +[ t ] [ "0" "[\\d]" matches? ] unit-test +[ f ] [ "a" "[\\d]" matches? ] unit-test +[ f ] [ "0" "[^\\d]" matches? ] unit-test +[ t ] [ "a" "[^\\d]" matches? ] unit-test + +[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" matches? ] unit-test +[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" matches? ] unit-test +[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" matches? ] unit-test + +[ t ] [ "1000" "\\d{4,6}" matches? ] unit-test +! [ t ] [ "1000" "[0-9]{4,6}" matches? ] unit-test + diff --git a/extra/regexp/regexp.factor b/extra/regexp/regexp.factor index 02d66ee59b..8fdc1bed8b 100644 --- a/extra/regexp/regexp.factor +++ b/extra/regexp/regexp.factor @@ -1,6 +1,6 @@ USING: arrays combinators kernel lazy-lists math math.parser namespaces parser parser-combinators parser-combinators.simple -promises sequences sequences.lib strings ; +promises quotations sequences sequences.lib strings ; USING: continuations io prettyprint ; IN: regexp @@ -9,22 +9,29 @@ IN: regexp : escaped-char { - { CHAR: d [ [ digit? ] satisfy ] } - { CHAR: D [ [ digit? not ] satisfy ] } - { CHAR: s [ [ blank? ] satisfy ] } - { CHAR: S [ [ blank? not ] satisfy ] } - [ ] + { CHAR: d [ [ digit? ] ] } + { CHAR: D [ [ digit? not ] ] } + { CHAR: s [ [ blank? ] ] } + { CHAR: S [ [ blank? not ] ] } + { CHAR: \\ [ [ CHAR: \\ = ] ] } + [ "bad \\, use \\\\ to match a literal \\" throw ] } case ; : 'escaped-char' "\\" token any-char-parser &> [ escaped-char ] <@ ; +! Must escape to use as literals +! : meta-chars "[\\^$.|?*+()" ; + : 'ordinary-char' - [ "^*+?|(){}[]" member? not ] satisfy [ 1string token ] <@ ; + [ "\\^*+?|(){}[" member? not ] satisfy ; : 'char' 'escaped-char' 'ordinary-char' <|> ; -: 'string' 'char' <+> [ [ <&> ] reduce* ] <@ ; +: 'string' + 'char' <+> [ + [ dup quotation? [ satisfy ] [ 1token ] if ] [ <&> ] map-reduce + ] <@ ; : exactly-n ( parser n -- parser' ) swap and-parser construct-boa ; @@ -54,10 +61,41 @@ C: group-result 'regexp' [ [ ] <@ ] <@ ")" token <& &> ; +! Special cases: ]\\^- +: predicates>cond ( seq -- quot ) + #! Takes an array of quotation predicates/objects and makes a cond + #! Makes a predicate of each obj like so: [ dup obj = ] + #! Leaves quotations alone + #! The cond returns a boolean, t if one of the predicates matches + [ + dup callable? [ [ = ] curry ] unless + [ dup ] swap compose [ drop t ] 2array + ] map { [ t ] [ drop f ] } add [ cond ] curry ; + +: 'range' + any-char-parser "-" token <& any-char-parser <&> + [ first2 [ between? ] 2curry ] <@ ; + +: 'character-class-contents' + 'escaped-char' + 'range' <|> + [ "\\]" member? not ] satisfy <|> ; + +: 'character-class' + "[" token + "^" token 'character-class-contents' <+> <&:> + [ predicates>cond [ not ] compose satisfy ] <@ + "]" token [ first ] <@ 'character-class-contents' <*> <&:> + [ predicates>cond satisfy ] <@ <|> + 'character-class-contents' <+> [ predicates>cond satisfy ] <@ <|> + &> + "]" token <& ; + : 'term' 'any-char' 'string' <|> 'grouping' <|> + 'character-class' <|> <+> [ dup length 1 = [ first ] [ and-parser construct-boa ] if From 2a5b65a912e867f12d2562ba53befc09ba937043 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 26 Nov 2007 17:38:24 -0600 Subject: [PATCH 03/12] Add 1token to parser combinators --- extra/parser-combinators/parser-combinators.factor | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extra/parser-combinators/parser-combinators.factor b/extra/parser-combinators/parser-combinators.factor index d6c44659a5..80d25c1bb7 100755 --- a/extra/parser-combinators/parser-combinators.factor +++ b/extra/parser-combinators/parser-combinators.factor @@ -32,6 +32,8 @@ M: token-parser parse ( input parser -- list ) 2drop nil ] if ; +: 1token ( n -- parser ) 1string token ; + TUPLE: satisfy-parser quot ; C: satisfy satisfy-parser ( quot -- parser ) From ed359b66234c039f156427504e064df210520bd3 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 10:28:28 +1300 Subject: [PATCH 04/12] Syntax tree for ebnf --- extra/peg/ebnf/ebnf-tests.factor | 24 +++++++++++++++++++++--- extra/peg/ebnf/ebnf.factor | 28 ++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index f7af6f98d3..0eeab7c4dc 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -12,6 +12,24 @@ IN: temporary "\"55\"" 'terminal' parse parse-result-ast ] unit-test -! { } [ -! "digit = \"0\" | \"1\" | \"2\"" 'rule' parse parse-result-ast -! ] unit-test \ No newline at end of file +{ + T{ ebnf-rule f + "digit" + T{ ebnf-choice f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + } + } +} [ + "digit = \"1\" | \"2\"" 'rule' parse parse-result-ast +] unit-test + +{ + T{ ebnf-rule f + "digit" + T{ ebnf-sequence f + V{ T{ ebnf-terminal f "1" } T{ ebnf-terminal f "2" } } + } + } +} [ + "digit = \"1\" \"2\"" 'rule' parse parse-result-ast +] unit-test \ No newline at end of file diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index c41e9d31a4..61eb7382c4 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -6,10 +6,14 @@ IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-choice options ; +TUPLE: ebnf-sequence elements ; +TUPLE: ebnf-rule symbol elements ; C: ebnf-non-terminal C: ebnf-terminal C: ebnf-choice +C: ebnf-sequence +C: ebnf-rule GENERIC: ebnf-compile ( ast -- quot ) @@ -25,7 +29,17 @@ M: ebnf-choice ebnf-compile ( ast -- quot ) ebnf-compile , ] each ] { } make , - [ call ] , \ map , + [ call ] , \ map , \ choice , + ] [ ] make ; + +M: ebnf-sequence ebnf-compile ( ast -- quot ) + [ + [ + ebnf-sequence-elements [ + ebnf-compile , + ] each + ] { } make , + [ call ] , \ map , \ seq , ] [ ] make ; DEFER: 'rhs' @@ -40,7 +54,9 @@ DEFER: 'rhs' 'non-terminal' 'terminal' 2array choice ; : 'sequence' ( -- parser ) - 'element' sp repeat1 ; + 'element' sp + "|" token sp ensure-not 2array seq [ first ] action + repeat1 [ ] action ; : 'choice' ( -- parser ) 'element' sp "|" token sp list-of [ ] action ; @@ -53,13 +69,13 @@ DEFER: 'rhs' : 'rhs' ( -- parser ) 'repeat0' - 'choice' 'sequence' + 'choice' 'element' 4array choice ; : 'rule' ( -- parser ) - 'non-terminal' - "=" token sp + 'non-terminal' [ ebnf-non-terminal-symbol ] action + "=" token sp hide 'rhs' - 3array seq ; + 3array seq [ first2 ] action ; From 38806885e639e6ca01b3c51bf8c4b0e4a7dc7109 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 11:07:17 +1300 Subject: [PATCH 05/12] Compile ebnf->factor --- extra/peg/ebnf/ebnf.factor | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 61eb7382c4..bb0e85c45a 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel arrays strings math.parser sequences namespaces peg ; +USING: kernel parser words arrays strings math.parser sequences namespaces peg ; IN: peg.ebnf TUPLE: ebnf-non-terminal symbol ; @@ -22,6 +22,11 @@ M: ebnf-terminal ebnf-compile ( ast -- quot ) ebnf-terminal-symbol , \ token , ] [ ] make ; +M: ebnf-non-terminal ebnf-compile ( ast -- quot ) + [ + ebnf-non-terminal-symbol in get lookup , + ] [ ] make ; + M: ebnf-choice ebnf-compile ( ast -- quot ) [ [ @@ -42,6 +47,12 @@ M: ebnf-sequence ebnf-compile ( ast -- quot ) [ call ] , \ map , \ seq , ] [ ] make ; +M: ebnf-rule ebnf-compile ( ast -- quot ) + [ + dup ebnf-rule-symbol , \ in , \ get , \ create , + ebnf-rule-elements ebnf-compile , \ define-compound , + ] [ ] make ; + DEFER: 'rhs' : 'non-terminal' ( -- parser ) From 16a0cc9eb1943469799a58ba9e0fc406d85a010c Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 11:25:34 +1300 Subject: [PATCH 06/12] add ebnf>quot --- extra/peg/ebnf/ebnf.factor | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index bb0e85c45a..a402a0fd73 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -8,12 +8,14 @@ TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; TUPLE: ebnf-rule symbol elements ; +TUPLE: ebnf rules ; C: ebnf-non-terminal C: ebnf-terminal C: ebnf-choice C: ebnf-sequence C: ebnf-rule +C: ebnf GENERIC: ebnf-compile ( ast -- quot ) @@ -24,7 +26,7 @@ M: ebnf-terminal ebnf-compile ( ast -- quot ) M: ebnf-non-terminal ebnf-compile ( ast -- quot ) [ - ebnf-non-terminal-symbol in get lookup , + ebnf-non-terminal-symbol , \ in , \ get , \ lookup , \ execute , ] [ ] make ; M: ebnf-choice ebnf-compile ( ast -- quot ) @@ -53,6 +55,13 @@ M: ebnf-rule ebnf-compile ( ast -- quot ) ebnf-rule-elements ebnf-compile , \ define-compound , ] [ ] make ; +M: ebnf ebnf-compile ( ast -- quot ) + [ + ebnf-rules [ + ebnf-compile % + ] each + ] [ ] make ; + DEFER: 'rhs' : 'non-terminal' ( -- parser ) @@ -90,3 +99,13 @@ DEFER: 'rhs' "=" token sp hide 'rhs' 3array seq [ first2 ] action ; + +: 'ebnf' ( -- parser ) + 'rule' sp ";" token sp hide list-of [ ] action ; + +: ebnf>quot ( string -- quot ) + 'ebnf' parse [ + parse-result-ast ebnf-compile + ] [ + f + ] if* ; \ No newline at end of file From 7a414869de3fad9315930ddcc6706632add45436 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 11:33:21 +1300 Subject: [PATCH 07/12] Support for repeat0 in ebnf --- extra/peg/ebnf/ebnf.factor | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index a402a0fd73..2f71ff961b 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -7,6 +7,7 @@ TUPLE: ebnf-non-terminal symbol ; TUPLE: ebnf-terminal symbol ; TUPLE: ebnf-choice options ; TUPLE: ebnf-sequence elements ; +TUPLE: ebnf-repeat0 group ; TUPLE: ebnf-rule symbol elements ; TUPLE: ebnf rules ; @@ -14,6 +15,7 @@ C: ebnf-non-terminal C: ebnf-terminal C: ebnf-choice C: ebnf-sequence +C: ebnf-repeat0 C: ebnf-rule C: ebnf @@ -49,6 +51,11 @@ M: ebnf-sequence ebnf-compile ( ast -- quot ) [ call ] , \ map , \ seq , ] [ ] make ; +M: ebnf-repeat0 ebnf-compile ( ast -- quot ) + [ + ebnf-repeat0-group ebnf-compile % \ repeat0 , + ] [ ] make ; + M: ebnf-rule ebnf-compile ( ast -- quot ) [ dup ebnf-rule-symbol , \ in , \ get , \ create , @@ -85,7 +92,7 @@ DEFER: 'rhs' "{" token sp hide [ 'rhs' sp ] delay "}" token sp hide - 3array seq ; + 3array seq [ first ] action ; : 'rhs' ( -- parser ) 'repeat0' From 88e93446b20b3d9a918bdf8e013fa6b5b641c02b Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 11:46:06 +1300 Subject: [PATCH 08/12] Add EBNF: word --- extra/peg/ebnf/ebnf.factor | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 2f71ff961b..5061e9ee3c 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -108,11 +108,13 @@ DEFER: 'rhs' 3array seq [ first2 ] action ; : 'ebnf' ( -- parser ) - 'rule' sp ";" token sp hide list-of [ ] action ; + 'rule' sp "." token sp hide list-of [ ] action ; : ebnf>quot ( string -- quot ) 'ebnf' parse [ parse-result-ast ebnf-compile ] [ f - ] if* ; \ No newline at end of file + ] if* ; + +: EBNF: ";" parse-tokens "" join ebnf>quot call ; parsing \ No newline at end of file From 4f0c40c05a191b03dc7d3df4bc0978ddebfd91bd Mon Sep 17 00:00:00 2001 From: Chris Double Date: Wed, 28 Nov 2007 11:52:17 +1300 Subject: [PATCH 09/12] Change EBNF: to --- extra/peg/ebnf/ebnf.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index 5061e9ee3c..8726581488 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -117,4 +117,4 @@ DEFER: 'rhs' f ] if* ; -: EBNF: ";" parse-tokens "" join ebnf>quot call ; parsing \ No newline at end of file +: " parse-tokens "" join ebnf>quot call ; parsing \ No newline at end of file From 937993720016e9cc95f11609d6b0ca83bae60ad5 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 28 Nov 2007 02:12:42 -0500 Subject: [PATCH 10/12] Globs --- extra/globs/authors.txt | 1 + extra/globs/globs-tests.factor | 18 ++++++++ extra/globs/globs.factor | 42 +++++++++++++++++++ extra/globs/summary.txt | 1 + .../parser-combinators.factor | 20 ++++++--- 5 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 extra/globs/authors.txt create mode 100644 extra/globs/globs-tests.factor create mode 100644 extra/globs/globs.factor create mode 100644 extra/globs/summary.txt diff --git a/extra/globs/authors.txt b/extra/globs/authors.txt new file mode 100644 index 0000000000..1901f27a24 --- /dev/null +++ b/extra/globs/authors.txt @@ -0,0 +1 @@ +Slava Pestov diff --git a/extra/globs/globs-tests.factor b/extra/globs/globs-tests.factor new file mode 100644 index 0000000000..8021128810 --- /dev/null +++ b/extra/globs/globs-tests.factor @@ -0,0 +1,18 @@ +IN: temporary +USING: tools.test globs ; + +[ f ] [ "abd" "fdf" glob-matches? ] unit-test +[ f ] [ "fdsafas" "?" glob-matches? ] unit-test +[ t ] [ "fdsafas" "*as" glob-matches? ] unit-test +[ t ] [ "fdsafas" "*a*" glob-matches? ] unit-test +[ t ] [ "fdsafas" "*a?" glob-matches? ] unit-test +[ t ] [ "fdsafas" "*?" glob-matches? ] unit-test +[ f ] [ "fdsafas" "*s?" glob-matches? ] unit-test +[ t ] [ "a" "[abc]" glob-matches? ] unit-test +[ f ] [ "a" "[^abc]" glob-matches? ] unit-test +[ t ] [ "d" "[^abc]" glob-matches? ] unit-test +[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test +[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test +[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test +[ f ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test +[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test diff --git a/extra/globs/globs.factor b/extra/globs/globs.factor new file mode 100644 index 0000000000..bcc6b572fc --- /dev/null +++ b/extra/globs/globs.factor @@ -0,0 +1,42 @@ +! Copyright (C) 2007 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: parser-combinators regexp lazy-lists sequences kernel +promises ; +IN: globs + + [ token ] <@ ; + +: 'escaped-char' + "\\" token any-char-parser &> [ 1token ] <@ ; + +: 'escaped-string' + 'string' 'escaped-char' <|> ; + +DEFER: 'term' + +: 'glob' ( -- parser ) + 'term' <*> [ ] <@ ; + +: 'union' ( -- parser ) + 'glob' "," token nonempty-list-of "{" "}" surrounded-by + [ ] <@ ; + +LAZY: 'term' + 'union' + 'character-class' <|> + "?" token [ drop any-char-parser ] <@ <|> + "*" token [ drop any-char-parser <*> ] <@ <|> + 'escaped-string' <|> ; + +PRIVATE> + +: 'glob' just parse-1 just ; + +: glob-matches? ( input glob -- ? ) + parse nil? not ; diff --git a/extra/globs/summary.txt b/extra/globs/summary.txt new file mode 100644 index 0000000000..e97b9b28f7 --- /dev/null +++ b/extra/globs/summary.txt @@ -0,0 +1 @@ +Unix shell-style glob pattern matching diff --git a/extra/parser-combinators/parser-combinators.factor b/extra/parser-combinators/parser-combinators.factor index 80d25c1bb7..04032db19f 100755 --- a/extra/parser-combinators/parser-combinators.factor +++ b/extra/parser-combinators/parser-combinators.factor @@ -13,10 +13,10 @@ M: promise parse ( input parser -- list ) TUPLE: parse-result parsed unparsed ; : parse-1 ( input parser -- result ) - parse dup nil? [ - "Parse error" throw + dupd parse dup nil? [ + "Cannot parse " rot append throw ] [ - car parse-result-parsed + nip car parse-result-parsed ] if ; C: parse-result @@ -93,6 +93,9 @@ TUPLE: and-parser parsers ; 2array ] if and-parser construct-boa ; +: ( parsers -- parser ) + dup length 1 = [ first ] [ and-parser construct-boa ] if ; + : and-parser-parse ( list p1 -- list ) swap [ dup parse-result-unparsed rot parse @@ -111,15 +114,20 @@ M: and-parser parse ( input parser -- list ) and-parser-parsers unclip swapd parse [ [ and-parser-parse ] reduce ] 2curry promise ; -TUPLE: or-parser p1 p2 ; +TUPLE: or-parser parsers ; -C: <|> or-parser ( parser1 parser2 -- parser ) +: ( parsers -- parser ) + dup length 1 = [ first ] [ or-parser construct-boa ] if ; + +: <|> ( parser1 parser2 -- parser ) + 2array ; M: or-parser parse ( input parser1 -- list ) #! Return the combined list resulting from the parses #! of parser1 and parser2 being applied to the same #! input. This implements the choice parsing operator. - [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ; + or-parser-parsers 0 swap seq>list + [ parse ] lmap-with lconcat ; : left-trim-slice ( string -- string ) #! Return a new string without any leading whitespace From 2332fd746eba96e7221013e21f17713291969089 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 28 Nov 2007 02:13:02 -0500 Subject: [PATCH 11/12] Tweak :edit command --- extra/ui/tools/debugger/debugger.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/ui/tools/debugger/debugger.factor b/extra/ui/tools/debugger/debugger.factor index 0e7addb157..a7c173799a 100644 --- a/extra/ui/tools/debugger/debugger.factor +++ b/extra/ui/tools/debugger/debugger.factor @@ -52,7 +52,7 @@ debugger "gestures" f { \ :help H{ { +nullary+ t } { +listener+ t } } define-command -\ :edit H{ { +nullary+ t } } define-command +\ :edit H{ { +nullary+ t } { +listener+ t } } define-command debugger "toolbar" f { { T{ key-down f f "s" } com-traceback } From 022cce01c24c069db78ce25e0497cfb7f14ff4aa Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 28 Nov 2007 02:13:11 -0500 Subject: [PATCH 12/12] Changelog for 0.91 --- extra/help/handbook/Untitled-15 | 57 +++++++++++++++++++++++++++ extra/help/handbook/handbook.factor | 61 ++++++++++++++++++++++++++++- 2 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 extra/help/handbook/Untitled-15 diff --git a/extra/help/handbook/Untitled-15 b/extra/help/handbook/Untitled-15 new file mode 100644 index 0000000000..20a5c621aa --- /dev/null +++ b/extra/help/handbook/Untitled-15 @@ -0,0 +1,57 @@ +{ $subheading "Performance" } +{ $list + { "Continuations are now supported by the static stack effect system. This means that the " { $link infer } " word and the optimizing compiler now both support code which uses continuations." } + { "Many words which previously ran in the interpreter, such as error handling and I/O, are now compiled to optimized machine code." } + { "A non-optimizing, just-in-time compiler replaces the interpreter with no loss in functionality or introspective ability." } + { "The non-optimizing compiler compiles quotations the first time they are called, generating a series of stack pushes and subroutine calls." } + { "The optimizing compiler now performs some more representation inference. Alien pointers are unboxed where possible. This improves performance of the " { $vocab-link "ogg.player" } " Ogg Theora video player considerably." } + { "The queue of sleeping tasks is now a sorted priority queue. This improves performance considerably when there is a large number of sleeping threads (Doug Coleman)" } + { "Improved hash code algorithm for sequences" } + { "New, efficient implementations of " { $link bit? } " and " { $link log2 } " runs in constant time for large bignums" } + { "New " { $link big-random } " word for generating large random numbers quickly" } + { "Improved profiler no longer has to be explicitly enabled and disabled with a recompile step; instead, the " { $link profile } " word can be used at any time, and it dynamically patches all words in the code heap to increment call counts. There is no overhead when the profiler is not in use." } +} +{ $subheading "IO" } +{ $list + { "The " { $link "stream-protocol" } " has changed" } + { "New " { $link os-envs } " word to get the current set of environment variables" } + { "Redesigned " { $vocab-link "io.launcher" } " supports passing environment variables to the child process" } + { { $link } " implemented on Windows (Doug Coleman)" } + { "More robust Windows CE native I/O" } + { "Updated " { $vocab-link "io.mmap" } " for new module system, now supports Windows CE (Doug Coleman)" } + { { $vocab-link "io.sniffer" } " - packet sniffer library (Doug Coleman, Elie Chaftari)" } + { { $vocab-link "io.server" } " - improved logging support, logs to a file by default" } + { { $vocab-link "io.files" } " - several new file system manipulation words added" } + { { $vocab-link "tar" } " - tar file extraction in pure Factor (Doug Coleman)" } + { { $vocab-link "unix.linux" } ", " { $vocab-link "raptor" } " - ``Raptor Linux'', a set of alien bindings to low-level Linux features, such as network interface configuration, file system mounting/unmounting, etc, together with experimental boot scripts intended to entirely replace " { $snippet "/sbin/init" } ", " { $vocab-link "/etc/inittab" } " and " { $snippet "/etc/init.d/" } "." } +} +{ $subheading "Tools" } +{ $list + { "Graphical deploy tool added - see " { $link "ui.tools.deploy" } } + { "The deploy tool now supports Windows" } + { { $vocab-link "network-clipboard" } " - clipboard synchronization with a simple TCP/IP protocol" } +} +{ $subheading "UI" } +{ $list + { { $vocab-link "cairo" } " - updated for new module system, new features (Sampo Vuori)" } + { { $vocab-link "springies" } " - physics simulation UI demo (Eduardo Cavazos)" } + { { $vocab-link "ui.gadgets.buttons" } " - added check box and radio button gadgets" } + { "Double- and triple-click-drag now supported in the editor gadget to select words or lines at a time" } + { "Windows can be closed on request now using " { $link close-window } } + { "New icons (Elie Chaftari)" } +} +{ $subheading "Other" } +{ $list + { "The " { $snippet "queues" } " vocabulary has been removed because its functionality is a subset of " { $vocab-link "dlists" } } + { "The " { $vocab-link "http.server.responder.cgi" } " vocabulary implements CGI support for the Factor HTTP server." } + { "The optimizing compiler no longer depends on the number tower and it is possible to bootstrap a minimal image by just passing " { $snippet "-include=compiler" } " to stage 2 bootstrap." } + { { $vocab-link "benchmarks.knucleotide" } " - new benchmark (Eric Mertens)" } + { { $vocab-link "channels" } " - concurrent message passing over message channels" } + { { $vocab-link "destructors" } " - deterministic scope-based resource deallocation (Doug Coleman)" } + { { $vocab-link "dlists" } " - various updates (Doug Coleman)" } + { { $vocab-link "editors.notepadpp" } " - Notepad++ integration (Doug Coleman)" } + { { $vocab-link "heaps" } " - updated for new module system and cleaned up (Doug Coleman)" } + { { $vocab-link "peg" } " - Parser Expression Grammars, a new appoach to parser construction, similar to parser combinators (Chris Double)" } + { { $vocab-link "regexp" } " - revived from " { $snippet "unmaintained/" } " and completely redesigned (Doug Coleman)" } + { { $vocab-link "tuple.lib" } " - some utility words for working with tuples (Doug Coleman)" } +} diff --git a/extra/help/handbook/handbook.factor b/extra/help/handbook/handbook.factor index d1b48d9955..ef25e91191 100755 --- a/extra/help/handbook/handbook.factor +++ b/extra/help/handbook/handbook.factor @@ -1,7 +1,7 @@ USING: help help.markup help.syntax help.topics namespaces words sequences classes assocs vocabs kernel arrays prettyprint.backend kernel.private io tools.browser -generic ; +generic math tools.profiler system ui ; IN: help.handbook ARTICLE: "conventions" "Conventions" @@ -222,6 +222,63 @@ ARTICLE: "handbook" "Factor documentation" USING: io.files io.sockets float-arrays inference ; ARTICLE: "changes" "Changes in the latest release" +{ $heading "Factor 0.91" } +{ $subheading "Performance" } +{ $list + { "Continuations are now supported by the static stack effect system. This means that the " { $link infer } " word and the optimizing compiler now both support code which uses continuations." } + { "Many words which previously ran in the interpreter, such as error handling and I/O, are now compiled to optimized machine code." } + { "A non-optimizing, just-in-time compiler replaces the interpreter with no loss in functionality or introspective ability." } + { "The non-optimizing compiler compiles quotations the first time they are called, generating a series of stack pushes and subroutine calls. It offers a 33%-50% performance increase over the interpreter." } + { "The optimizing compiler now performs some more representation inference. Alien pointers are unboxed where possible. This improves performance of the " { $vocab-link "ogg.player" } " Ogg Theora video player." } + { "The queue of sleeping tasks is now a sorted priority queue. This reduces overhead for workloads involving large numbers of sleeping threads (Doug Coleman)" } + { "Improved hash code algorithm for sequences" } + { "New, efficient implementations of " { $link bit? } " and " { $link log2 } " runs in constant time for large bignums" } + { "New " { $link big-random } " word for generating large random numbers quickly" } + { "Improved profiler no longer has to be explicitly enabled and disabled with a full recompile; instead, the " { $link profile } " word can be used at any time, and it dynamically patches words to increment call counts. There is no overhead when the profiler is not in use." } +} +{ $subheading "IO" } +{ $list + { "More robust Windows CE native I/O" } + { "New " { $link os-envs } " word to get the current set of environment variables" } + { "Redesigned " { $vocab-link "io.launcher" } " supports passing environment variables to the child process" } + { { $link } " implemented on Windows (Doug Coleman)" } + { "Updated " { $vocab-link "io.mmap" } " for new module system, now supports Windows CE (Doug Coleman)" } + { { $vocab-link "io.sniffer" } " - packet sniffer library (Doug Coleman, Elie Chaftari)" } + { { $vocab-link "io.server" } " - improved logging support, logs to a file by default" } + { { $vocab-link "io.files" } " - several new file system manipulation words added" } + { { $vocab-link "tar" } " - tar file extraction in pure Factor (Doug Coleman)" } + { { $vocab-link "unix.linux" } ", " { $vocab-link "raptor" } " - ``Raptor Linux'', a set of alien bindings to low-level Linux features, such as network interface configuration, file system mounting/unmounting, etc, together with experimental boot scripts intended to entirely replace " { $snippet "/sbin/init" } ", " { $vocab-link "/etc/inittab" } " and " { $snippet "/etc/init.d/" } " (Eduardo Cavazos)." } +} +{ $subheading "Tools" } +{ $list + { "Graphical deploy tool added - see " { $link "ui.tools.deploy" } } + { "The deploy tool now supports Windows" } + { { $vocab-link "network-clipboard" } " - clipboard synchronization with a simple TCP/IP protocol" } +} +{ $subheading "UI" } +{ $list + { { $vocab-link "cairo" } " - updated for new module system, new features (Sampo Vuori)" } + { { $vocab-link "springies" } " - physics simulation UI demo (Eduardo Cavazos)" } + { { $vocab-link "ui.gadgets.buttons" } " - added check box and radio button gadgets" } + { "Double- and triple-click-drag now supported in the editor gadget to select words or lines at a time" } + { "Windows can be closed on request now using " { $link close-window } } + { "New icons (Elie Chaftari)" } +} +{ $subheading "Other" } +{ $list + { "The " { $snippet "queues" } " vocabulary has been removed because its functionality is a subset of " { $vocab-link "dlists" } } + { "The " { $vocab-link "webapps.cgi" } " vocabulary implements CGI support for the Factor HTTP server." } + { "The optimizing compiler no longer depends on the number tower and it is possible to bootstrap a minimal image by just passing " { $snippet "-include=compiler" } " to stage 2 bootstrap." } + { { $vocab-link "benchmark.knucleotide" } " - new benchmark (Eric Mertens)" } + { { $vocab-link "channels" } " - concurrent message passing over message channels" } + { { $vocab-link "destructors" } " - deterministic scope-based resource deallocation (Doug Coleman)" } + { { $vocab-link "dlists" } " - various updates (Doug Coleman)" } + { { $vocab-link "editors.notepadpp" } " - Notepad++ integration (Doug Coleman)" } + { { $vocab-link "heaps" } " - updated for new module system and cleaned up (Doug Coleman)" } + { { $vocab-link "peg" } " - Parser Expression Grammars, a new appoach to parser construction, similar to parser combinators (Chris Double)" } + { { $vocab-link "regexp" } " - revived from " { $snippet "unmaintained/" } " and completely redesigned (Doug Coleman)" } + { { $vocab-link "tuple.lib" } " - some utility words for working with tuples (Doug Coleman)" } +} { $heading "Factor 0.90" } { $subheading "Core" } { $list @@ -249,7 +306,7 @@ ARTICLE: "changes" "Changes in the latest release" "Most existing libraries were improved when ported to the new module system; the most notable changes include:" { $list { { $vocab-link "asn1" } ": ASN1 parser and writer. (Elie Chaftari)" } - { { $vocab-link "benchmarks" } ": new set of benchmarks." } + { { $vocab-link "benchmark" } ": new set of benchmarks." } { { $vocab-link "cfdg" } ": Context-free design grammar implementation; see " { $url "http://www.chriscoyne.com/cfdg/" } ". (Eduardo Cavazos)" } { { $vocab-link "cryptlib" } ": Cryptlib library binding. (Elie Chaftari)" } { { $vocab-link "cryptlib.streams" } ": Streams which perform SSL encryption and decryption. (Matthew Willis)" }