a bit of refactoring, preparing to take options out of the parsing stage
parent
87bdc0acd3
commit
f8a23c657b
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors hashtables kernel math state-tables vectors ;
|
||||
USING: accessors hashtables kernel math vectors ;
|
||||
IN: regexp.backend
|
||||
|
||||
TUPLE: regexp
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs grouping kernel regexp.backend
|
||||
locals math namespaces regexp.parser sequences state-tables fry
|
||||
quotations math.order math.ranges vectors unicode.categories
|
||||
regexp.utils regexp.transition-tables words sets ;
|
||||
locals math namespaces regexp.parser sequences fry quotations
|
||||
math.order math.ranges vectors unicode.categories regexp.utils
|
||||
regexp.transition-tables words sets ;
|
||||
IN: regexp.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
|
@ -22,6 +22,9 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
|
|||
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
|
||||
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
|
||||
|
||||
: add-global-flag ( flag -- )
|
||||
current-regexp get nfa-table>> flags>> conjoin ;
|
||||
|
||||
: next-state ( regexp -- state )
|
||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||
|
||||
|
@ -138,21 +141,25 @@ M: non-capture-group nfa-node ( node -- )
|
|||
M: reluctant-kleene-star nfa-node ( node -- )
|
||||
term>> <kleene-star> nfa-node ;
|
||||
|
||||
|
||||
: add-epsilon-flag ( flag -- )
|
||||
eps literal-transition add-simple-entry add-traversal-flag ;
|
||||
|
||||
M: beginning-of-line nfa-node ( node -- )
|
||||
drop beginning-of-line add-epsilon-flag ;
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
beginning-of-line add-global-flag ;
|
||||
|
||||
M: end-of-line nfa-node ( node -- )
|
||||
drop end-of-line add-epsilon-flag ;
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
end-of-line add-global-flag ;
|
||||
|
||||
M: beginning-of-input nfa-node ( node -- )
|
||||
drop beginning-of-input add-epsilon-flag ;
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
beginning-of-input add-global-flag ;
|
||||
|
||||
M: end-of-input nfa-node ( node -- )
|
||||
drop end-of-input add-epsilon-flag ;
|
||||
drop
|
||||
eps literal-transition add-simple-entry
|
||||
end-of-input add-global-flag ;
|
||||
|
||||
M: negation nfa-node ( node -- )
|
||||
negation-mode inc
|
||||
|
|
|
@ -58,7 +58,7 @@ SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
|||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
terminator-class unmatchable-class word-boundary-class ;
|
||||
unmatchable-class terminator-class word-boundary-class ;
|
||||
|
||||
SINGLETONS: beginning-of-group end-of-group
|
||||
beginning-of-character-class end-of-character-class
|
||||
|
@ -87,8 +87,8 @@ left-parenthesis pipe caret dash ;
|
|||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||
: <constant> ( obj -- constant )
|
||||
dup Letter? get-case-insensitive and [
|
||||
[ ch>lower constant boa ]
|
||||
[ ch>upper constant boa ] bi 2array <alternation>
|
||||
[ ch>lower ] [ ch>upper ] bi
|
||||
[ constant boa ] bi@ 2array <alternation>
|
||||
] [
|
||||
constant boa
|
||||
] if ;
|
||||
|
@ -384,20 +384,22 @@ DEFER: handle-left-bracket
|
|||
} case
|
||||
[ (parse-character-class) ] when ;
|
||||
|
||||
: push-constant ( ch -- ) <constant> push-stack ;
|
||||
|
||||
: parse-character-class-second ( -- )
|
||||
read1 {
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
{ CHAR: [ [ CHAR: [ push-constant ] }
|
||||
{ CHAR: ] [ CHAR: ] push-constant ] }
|
||||
{ CHAR: - [ CHAR: - push-constant ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class-first ( -- )
|
||||
read1 {
|
||||
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
{ CHAR: [ [ CHAR: [ push-constant ] }
|
||||
{ CHAR: ] [ CHAR: ] push-constant ] }
|
||||
{ CHAR: - [ CHAR: - push-constant ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
|
@ -431,7 +433,7 @@ DEFER: handle-left-bracket
|
|||
drop
|
||||
handle-back-anchor f
|
||||
] [
|
||||
<constant> push-stack t
|
||||
push-constant t
|
||||
] if
|
||||
]
|
||||
} case ;
|
||||
|
|
|
@ -25,12 +25,13 @@ TUPLE: default ;
|
|||
: <default-transition> ( from to -- transition )
|
||||
t default-transition make-transition ;
|
||||
|
||||
TUPLE: transition-table transitions start-state final-states ;
|
||||
TUPLE: transition-table transitions start-state final-states flags ;
|
||||
|
||||
: <transition-table> ( -- transition-table )
|
||||
transition-table new
|
||||
H{ } clone >>transitions
|
||||
H{ } clone >>final-states ;
|
||||
H{ } clone >>final-states
|
||||
H{ } clone >>flags ;
|
||||
|
||||
: maybe-initialize-key ( key hashtable -- )
|
||||
2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;
|
||||
|
|
|
@ -82,6 +82,7 @@ M: end-of-input flag-action ( dfa-traverser flag -- )
|
|||
drop
|
||||
dup end-of-text? [ t >>match-failed? ] unless drop ;
|
||||
|
||||
|
||||
M: beginning-of-line flag-action ( dfa-traverser flag -- )
|
||||
drop
|
||||
dup {
|
||||
|
@ -96,6 +97,7 @@ M: end-of-line flag-action ( dfa-traverser flag -- )
|
|||
[ next-text-character terminator-class class-member? ]
|
||||
} 1|| [ t >>match-failed? ] unless drop ;
|
||||
|
||||
|
||||
M: word-boundary flag-action ( dfa-traverser flag -- )
|
||||
drop
|
||||
dup {
|
||||
|
@ -103,6 +105,7 @@ M: word-boundary flag-action ( dfa-traverser flag -- )
|
|||
[ current-text-character terminator-class class-member? ]
|
||||
} 1|| [ t >>match-failed? ] unless drop ;
|
||||
|
||||
|
||||
M: lookahead-on flag-action ( dfa-traverser flag -- )
|
||||
drop
|
||||
lookahead-counters>> 0 swap push ;
|
||||
|
|
Loading…
Reference in New Issue