a bit of refactoring, preparing to take options out of the parsing stage

db4
Doug Coleman 2008-11-24 12:59:29 -06:00
parent 87bdc0acd3
commit f8a23c657b
5 changed files with 37 additions and 24 deletions

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors hashtables kernel math state-tables vectors ; USING: accessors hashtables kernel math vectors ;
IN: regexp.backend IN: regexp.backend
TUPLE: regexp TUPLE: regexp

View File

@ -1,9 +1,9 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs grouping kernel regexp.backend USING: accessors arrays assocs grouping kernel regexp.backend
locals math namespaces regexp.parser sequences state-tables fry locals math namespaces regexp.parser sequences fry quotations
quotations math.order math.ranges vectors unicode.categories math.order math.ranges vectors unicode.categories regexp.utils
regexp.utils regexp.transition-tables words sets ; regexp.transition-tables words sets ;
IN: regexp.nfa IN: regexp.nfa
SYMBOL: negation-mode SYMBOL: negation-mode
@ -22,6 +22,9 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
: add-global-flag ( flag -- )
current-regexp get nfa-table>> flags>> conjoin ;
: next-state ( regexp -- state ) : next-state ( regexp -- state )
[ state>> ] [ [ 1+ ] change-state drop ] bi ; [ state>> ] [ [ 1+ ] change-state drop ] bi ;
@ -138,21 +141,25 @@ M: non-capture-group nfa-node ( node -- )
M: reluctant-kleene-star nfa-node ( node -- ) M: reluctant-kleene-star nfa-node ( node -- )
term>> <kleene-star> nfa-node ; term>> <kleene-star> nfa-node ;
: add-epsilon-flag ( flag -- )
eps literal-transition add-simple-entry add-traversal-flag ;
M: beginning-of-line nfa-node ( node -- ) M: beginning-of-line nfa-node ( node -- )
drop beginning-of-line add-epsilon-flag ; drop
eps literal-transition add-simple-entry
beginning-of-line add-global-flag ;
M: end-of-line nfa-node ( node -- ) M: end-of-line nfa-node ( node -- )
drop end-of-line add-epsilon-flag ; drop
eps literal-transition add-simple-entry
end-of-line add-global-flag ;
M: beginning-of-input nfa-node ( node -- ) M: beginning-of-input nfa-node ( node -- )
drop beginning-of-input add-epsilon-flag ; drop
eps literal-transition add-simple-entry
beginning-of-input add-global-flag ;
M: end-of-input nfa-node ( node -- ) M: end-of-input nfa-node ( node -- )
drop end-of-input add-epsilon-flag ; drop
eps literal-transition add-simple-entry
end-of-input add-global-flag ;
M: negation nfa-node ( node -- ) M: negation nfa-node ( node -- )
negation-mode inc negation-mode inc

View File

@ -58,7 +58,7 @@ SINGLETONS: letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class control-character-class hex-digit-class java-blank-class c-identifier-class
terminator-class unmatchable-class word-boundary-class ; unmatchable-class terminator-class word-boundary-class ;
SINGLETONS: beginning-of-group end-of-group SINGLETONS: beginning-of-group end-of-group
beginning-of-character-class end-of-character-class beginning-of-character-class end-of-character-class
@ -87,8 +87,8 @@ left-parenthesis pipe caret dash ;
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ; : <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
: <constant> ( obj -- constant ) : <constant> ( obj -- constant )
dup Letter? get-case-insensitive and [ dup Letter? get-case-insensitive and [
[ ch>lower constant boa ] [ ch>lower ] [ ch>upper ] bi
[ ch>upper constant boa ] bi 2array <alternation> [ constant boa ] bi@ 2array <alternation>
] [ ] [
constant boa constant boa
] if ; ] if ;
@ -384,20 +384,22 @@ DEFER: handle-left-bracket
} case } case
[ (parse-character-class) ] when ; [ (parse-character-class) ] when ;
: push-constant ( ch -- ) <constant> push-stack ;
: parse-character-class-second ( -- ) : parse-character-class-second ( -- )
read1 { read1 {
{ CHAR: [ [ CHAR: [ <constant> push-stack ] } { CHAR: [ [ CHAR: [ push-constant ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] } { CHAR: ] [ CHAR: ] push-constant ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] } { CHAR: - [ CHAR: - push-constant ] }
[ push1 ] [ push1 ]
} case ; } case ;
: parse-character-class-first ( -- ) : parse-character-class-first ( -- )
read1 { read1 {
{ CHAR: ^ [ caret push-stack parse-character-class-second ] } { CHAR: ^ [ caret push-stack parse-character-class-second ] }
{ CHAR: [ [ CHAR: [ <constant> push-stack ] } { CHAR: [ [ CHAR: [ push-constant ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] } { CHAR: ] [ CHAR: ] push-constant ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] } { CHAR: - [ CHAR: - push-constant ] }
[ push1 ] [ push1 ]
} case ; } case ;
@ -431,7 +433,7 @@ DEFER: handle-left-bracket
drop drop
handle-back-anchor f handle-back-anchor f
] [ ] [
<constant> push-stack t push-constant t
] if ] if
] ]
} case ; } case ;

View File

@ -25,12 +25,13 @@ TUPLE: default ;
: <default-transition> ( from to -- transition ) : <default-transition> ( from to -- transition )
t default-transition make-transition ; t default-transition make-transition ;
TUPLE: transition-table transitions start-state final-states ; TUPLE: transition-table transitions start-state final-states flags ;
: <transition-table> ( -- transition-table ) : <transition-table> ( -- transition-table )
transition-table new transition-table new
H{ } clone >>transitions H{ } clone >>transitions
H{ } clone >>final-states ; H{ } clone >>final-states
H{ } clone >>flags ;
: maybe-initialize-key ( key hashtable -- ) : maybe-initialize-key ( key hashtable -- )
2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ; 2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;

View File

@ -82,6 +82,7 @@ M: end-of-input flag-action ( dfa-traverser flag -- )
drop drop
dup end-of-text? [ t >>match-failed? ] unless drop ; dup end-of-text? [ t >>match-failed? ] unless drop ;
M: beginning-of-line flag-action ( dfa-traverser flag -- ) M: beginning-of-line flag-action ( dfa-traverser flag -- )
drop drop
dup { dup {
@ -96,6 +97,7 @@ M: end-of-line flag-action ( dfa-traverser flag -- )
[ next-text-character terminator-class class-member? ] [ next-text-character terminator-class class-member? ]
} 1|| [ t >>match-failed? ] unless drop ; } 1|| [ t >>match-failed? ] unless drop ;
M: word-boundary flag-action ( dfa-traverser flag -- ) M: word-boundary flag-action ( dfa-traverser flag -- )
drop drop
dup { dup {
@ -103,6 +105,7 @@ M: word-boundary flag-action ( dfa-traverser flag -- )
[ current-text-character terminator-class class-member? ] [ current-text-character terminator-class class-member? ]
} 1|| [ t >>match-failed? ] unless drop ; } 1|| [ t >>match-failed? ] unless drop ;
M: lookahead-on flag-action ( dfa-traverser flag -- ) M: lookahead-on flag-action ( dfa-traverser flag -- )
drop drop
lookahead-counters>> 0 swap push ; lookahead-counters>> 0 swap push ;