From 811026ce4d7b61863e51144b284ec99e59c5e6cd Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 6 Nov 2008 13:16:33 -0600 Subject: [PATCH 1/3] removing some uses of at* in favor of at, simplifying code --- basis/regexp/regexp-tests.factor | 1 - basis/regexp/regexp.factor | 2 -- basis/regexp/traversal/traversal.factor | 11 +++++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index 46696c8c0f..2339628801 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -331,4 +331,3 @@ IN: regexp-tests [ { 0 3 } ] [ "abc" "(ab|a)(bc)?" first-match ] unit-test [ { 23 24 } ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" first-match ] unit-test - diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 73555fe953..083a48a470 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -92,7 +92,6 @@ IN: regexp reversed-regexp initial-option construct-regexp ; - : parsing-regexp ( accum end -- accum ) lexer get dup skip-blank [ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column @@ -112,7 +111,6 @@ IN: regexp : R{ CHAR: } parsing-regexp ; parsing : R| CHAR: | parsing-regexp ; parsing - : find-regexp-syntax ( string -- prefix suffix ) { { "R/ " "/" } diff --git a/basis/regexp/traversal/traversal.factor b/basis/regexp/traversal/traversal.factor index 346d77e918..91c7ce16dc 100644 --- a/basis/regexp/traversal/traversal.factor +++ b/basis/regexp/traversal/traversal.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs combinators kernel math math.ranges quotations sequences regexp.parser regexp.classes fry arrays -combinators.short-circuit regexp.utils prettyprint regexp.nfa ; +combinators.short-circuit regexp.utils prettyprint regexp.nfa +shuffle ; IN: regexp.traversal TUPLE: dfa-traverser @@ -23,8 +24,7 @@ TUPLE: dfa-traverser [ dfa-table>> ] [ dfa-traversal-flags>> ] bi dfa-traverser new swap >>traversal-flags - swap [ start-state>> >>current-state ] keep - >>dfa-table + swap [ start-state>> >>current-state ] [ >>dfa-table ] bi swap >>text t >>traverse-forward 0 >>start-index @@ -116,7 +116,7 @@ M: capture-group-off flag-action ( dfa-traverser flag -- ) V{ } clone >>matches ; : match-literal ( transition from-state table -- to-state/f ) - transitions>> at* [ at ] [ 2drop f ] if ; + transitions>> at at ; : match-class ( transition from-state table -- to-state/f ) transitions>> at* [ @@ -124,8 +124,7 @@ M: capture-group-off flag-action ( dfa-traverser flag -- ) ] [ drop ] if ; : match-default ( transition from-state table -- to-state/f ) - [ nip ] dip transitions>> at* - [ t swap at* [ ] [ drop f ] if ] [ drop f ] if ; + nipd transitions>> at t swap at ; : match-transition ( obj from-state dfa -- to-state/f ) { [ match-literal ] [ match-class ] [ match-default ] } 3|| ; From 73f6691f759357a63171fcd40584bcdd350027a4 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 6 Nov 2008 15:48:08 -0600 Subject: [PATCH 2/3] print out clickable pathnames in project euler --- extra/project-euler/project-euler.factor | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extra/project-euler/project-euler.factor b/extra/project-euler/project-euler.factor index 5192e23a27..d85e7e206d 100644 --- a/extra/project-euler/project-euler.factor +++ b/extra/project-euler/project-euler.factor @@ -1,7 +1,7 @@ ! Copyright (c) 2007, 2008 Aaron Schaefer, Samuel Tardieu. ! See http://factorcode.org/license.txt for BSD license. USING: definitions io io.files kernel math math.parser project-euler.ave-time - sequences vocabs vocabs.loader + sequences vocabs vocabs.loader prettyprint project-euler.001 project-euler.002 project-euler.003 project-euler.004 project-euler.005 project-euler.006 project-euler.007 project-euler.008 project-euler.009 project-euler.010 project-euler.011 project-euler.012 @@ -33,7 +33,7 @@ IN: project-euler : solution-path ( n -- str/f ) number>euler "project-euler." prepend - vocab where dup [ first ] when ; + vocab where dup [ first ] when ; PRIVATE> @@ -43,8 +43,8 @@ PRIVATE> : run-project-euler ( -- ) problem-prompt dup problem-solved? [ dup number>euler "project-euler." prepend run - "Answer: " swap dup number? [ number>string ] when append print - "Source: " swap solution-path append print + "Answer: " write dup number? [ number>string ] when print + "Source: " write solution-path . ] [ drop "That problem has not been solved yet..." print ] if ; From 46aa56730b4a5c79cc326813026862b5a6e69649 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 6 Nov 2008 16:53:00 -0600 Subject: [PATCH 3/3] better parsing for anchors --- basis/regexp/parser/parser.factor | 44 ++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index d2ed346bf2..d04016b93a 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -233,15 +233,22 @@ ERROR: invalid-range a b ; SINGLETON: beginning-of-input SINGLETON: end-of-input -! : beginning-of-input ( -- obj ) -: handle-front-anchor ( -- ) front-anchor push-stack ; -: end-of-line ( -- obj ) - end-of-input +: newlines ( -- obj1 obj2 obj3 ) CHAR: \r CHAR: \n - 2dup 2array 4array lookahead boa ; + 2dup 2array ; -: handle-back-anchor ( -- ) end-of-line push-stack ; +: beginning-of-line ( -- obj ) + beginning-of-input newlines 4array lookbehind boa ; + +: end-of-line ( -- obj ) + end-of-input newlines 4array lookahead boa ; + +: handle-front-anchor ( -- ) + get-multiline beginning-of-line beginning-of-input ? push-stack ; + +: handle-back-anchor ( -- ) + get-multiline end-of-line end-of-input ? push-stack ; ERROR: bad-character-class obj ; ERROR: expected-posix-class ; @@ -412,16 +419,11 @@ DEFER: handle-left-bracket [ [ push ] keep current-regexp get (>>stack) ] [ finish-regexp-parse push-stack ] bi* ; - : parse-regexp-token ( token -- ? ) { -! todo: only match these at beginning/end of regexp - { CHAR: ^ [ handle-front-anchor t ] } - { CHAR: $ [ handle-back-anchor t ] } - - { CHAR: . [ handle-dot t ] } - { CHAR: ( [ handle-left-parenthesis t ] } + { CHAR: ( [ handle-left-parenthesis t ] } ! handle (?..) at beginning? { CHAR: ) [ handle-right-parenthesis f ] } + { CHAR: . [ handle-dot t ] } { CHAR: | [ handle-pipe t ] } { CHAR: ? [ handle-question t ] } { CHAR: * [ handle-star t ] } @@ -429,16 +431,28 @@ DEFER: handle-left-bracket { CHAR: { [ handle-left-brace t ] } { CHAR: [ [ handle-left-bracket t ] } { CHAR: \ [ handle-escape t ] } - [ push-stack t ] + [ + dup CHAR: $ = peek1 f = and [ + drop + handle-back-anchor f + ] [ + push-stack t + ] if + ] } case ; : (parse-regexp) ( -- ) read1 [ parse-regexp-token [ (parse-regexp) ] when ] when* ; +: parse-regexp-beginning ( -- ) + peek1 CHAR: ^ = [ drop1 handle-front-anchor ] when ; + : parse-regexp ( regexp -- ) dup current-regexp [ raw>> [ - [ (parse-regexp) ] with-input-stream + [ + parse-regexp-beginning (parse-regexp) + ] with-input-stream ] unless-empty current-regexp get stack finish-regexp-parse