a bit of refactoring, preparing to take options out of the parsing stage

db4
Doug Coleman 2008-11-24 12:59:29 -06:00
parent 87bdc0acd3
commit f8a23c657b
5 changed files with 37 additions and 24 deletions

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors hashtables kernel math state-tables vectors ;
USING: accessors hashtables kernel math vectors ;
IN: regexp.backend
TUPLE: regexp

View File

@ -1,9 +1,9 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs grouping kernel regexp.backend
locals math namespaces regexp.parser sequences state-tables fry
quotations math.order math.ranges vectors unicode.categories
regexp.utils regexp.transition-tables words sets ;
locals math namespaces regexp.parser sequences fry quotations
math.order math.ranges vectors unicode.categories regexp.utils
regexp.transition-tables words sets ;
IN: regexp.nfa
SYMBOL: negation-mode
@ -22,6 +22,9 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
: add-global-flag ( flag -- )
current-regexp get nfa-table>> flags>> conjoin ;
: next-state ( regexp -- state )
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
@ -138,21 +141,25 @@ M: non-capture-group nfa-node ( node -- )
M: reluctant-kleene-star nfa-node ( node -- )
term>> <kleene-star> nfa-node ;
: add-epsilon-flag ( flag -- )
eps literal-transition add-simple-entry add-traversal-flag ;
M: beginning-of-line nfa-node ( node -- )
drop beginning-of-line add-epsilon-flag ;
drop
eps literal-transition add-simple-entry
beginning-of-line add-global-flag ;
M: end-of-line nfa-node ( node -- )
drop end-of-line add-epsilon-flag ;
drop
eps literal-transition add-simple-entry
end-of-line add-global-flag ;
M: beginning-of-input nfa-node ( node -- )
drop beginning-of-input add-epsilon-flag ;
drop
eps literal-transition add-simple-entry
beginning-of-input add-global-flag ;
M: end-of-input nfa-node ( node -- )
drop end-of-input add-epsilon-flag ;
drop
eps literal-transition add-simple-entry
end-of-input add-global-flag ;
M: negation nfa-node ( node -- )
negation-mode inc

View File

@ -58,7 +58,7 @@ SINGLETONS: letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class
terminator-class unmatchable-class word-boundary-class ;
unmatchable-class terminator-class word-boundary-class ;
SINGLETONS: beginning-of-group end-of-group
beginning-of-character-class end-of-character-class
@ -87,8 +87,8 @@ left-parenthesis pipe caret dash ;
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
: <constant> ( obj -- constant )
dup Letter? get-case-insensitive and [
[ ch>lower constant boa ]
[ ch>upper constant boa ] bi 2array <alternation>
[ ch>lower ] [ ch>upper ] bi
[ constant boa ] bi@ 2array <alternation>
] [
constant boa
] if ;
@ -384,20 +384,22 @@ DEFER: handle-left-bracket
} case
[ (parse-character-class) ] when ;
: push-constant ( ch -- ) <constant> push-stack ;
: parse-character-class-second ( -- )
read1 {
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
{ CHAR: [ [ CHAR: [ push-constant ] }
{ CHAR: ] [ CHAR: ] push-constant ] }
{ CHAR: - [ CHAR: - push-constant ] }
[ push1 ]
} case ;
: parse-character-class-first ( -- )
read1 {
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
{ CHAR: [ [ CHAR: [ push-constant ] }
{ CHAR: ] [ CHAR: ] push-constant ] }
{ CHAR: - [ CHAR: - push-constant ] }
[ push1 ]
} case ;
@ -431,7 +433,7 @@ DEFER: handle-left-bracket
drop
handle-back-anchor f
] [
<constant> push-stack t
push-constant t
] if
]
} case ;

View File

@ -25,12 +25,13 @@ TUPLE: default ;
: <default-transition> ( from to -- transition )
t default-transition make-transition ;
TUPLE: transition-table transitions start-state final-states ;
TUPLE: transition-table transitions start-state final-states flags ;
: <transition-table> ( -- transition-table )
transition-table new
H{ } clone >>transitions
H{ } clone >>final-states ;
H{ } clone >>final-states
H{ } clone >>flags ;
: maybe-initialize-key ( key hashtable -- )
2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;

View File

@ -82,6 +82,7 @@ M: end-of-input flag-action ( dfa-traverser flag -- )
drop
dup end-of-text? [ t >>match-failed? ] unless drop ;
M: beginning-of-line flag-action ( dfa-traverser flag -- )
drop
dup {
@ -96,6 +97,7 @@ M: end-of-line flag-action ( dfa-traverser flag -- )
[ next-text-character terminator-class class-member? ]
} 1|| [ t >>match-failed? ] unless drop ;
M: word-boundary flag-action ( dfa-traverser flag -- )
drop
dup {
@ -103,6 +105,7 @@ M: word-boundary flag-action ( dfa-traverser flag -- )
[ current-text-character terminator-class class-member? ]
} 1|| [ t >>match-failed? ] unless drop ;
M: lookahead-on flag-action ( dfa-traverser flag -- )
drop
lookahead-counters>> 0 swap push ;