a bit of refactoring, preparing to take options out of the parsing stage
parent
87bdc0acd3
commit
f8a23c657b
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors hashtables kernel math state-tables vectors ;
|
USING: accessors hashtables kernel math vectors ;
|
||||||
IN: regexp.backend
|
IN: regexp.backend
|
||||||
|
|
||||||
TUPLE: regexp
|
TUPLE: regexp
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs grouping kernel regexp.backend
|
USING: accessors arrays assocs grouping kernel regexp.backend
|
||||||
locals math namespaces regexp.parser sequences state-tables fry
|
locals math namespaces regexp.parser sequences fry quotations
|
||||||
quotations math.order math.ranges vectors unicode.categories
|
math.order math.ranges vectors unicode.categories regexp.utils
|
||||||
regexp.utils regexp.transition-tables words sets ;
|
regexp.transition-tables words sets ;
|
||||||
IN: regexp.nfa
|
IN: regexp.nfa
|
||||||
|
|
||||||
SYMBOL: negation-mode
|
SYMBOL: negation-mode
|
||||||
|
@ -22,6 +22,9 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
|
||||||
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
|
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
|
||||||
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
|
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
|
||||||
|
|
||||||
|
: add-global-flag ( flag -- )
|
||||||
|
current-regexp get nfa-table>> flags>> conjoin ;
|
||||||
|
|
||||||
: next-state ( regexp -- state )
|
: next-state ( regexp -- state )
|
||||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||||
|
|
||||||
|
@ -138,21 +141,25 @@ M: non-capture-group nfa-node ( node -- )
|
||||||
M: reluctant-kleene-star nfa-node ( node -- )
|
M: reluctant-kleene-star nfa-node ( node -- )
|
||||||
term>> <kleene-star> nfa-node ;
|
term>> <kleene-star> nfa-node ;
|
||||||
|
|
||||||
|
|
||||||
: add-epsilon-flag ( flag -- )
|
|
||||||
eps literal-transition add-simple-entry add-traversal-flag ;
|
|
||||||
|
|
||||||
M: beginning-of-line nfa-node ( node -- )
|
M: beginning-of-line nfa-node ( node -- )
|
||||||
drop beginning-of-line add-epsilon-flag ;
|
drop
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
beginning-of-line add-global-flag ;
|
||||||
|
|
||||||
M: end-of-line nfa-node ( node -- )
|
M: end-of-line nfa-node ( node -- )
|
||||||
drop end-of-line add-epsilon-flag ;
|
drop
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
end-of-line add-global-flag ;
|
||||||
|
|
||||||
M: beginning-of-input nfa-node ( node -- )
|
M: beginning-of-input nfa-node ( node -- )
|
||||||
drop beginning-of-input add-epsilon-flag ;
|
drop
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
beginning-of-input add-global-flag ;
|
||||||
|
|
||||||
M: end-of-input nfa-node ( node -- )
|
M: end-of-input nfa-node ( node -- )
|
||||||
drop end-of-input add-epsilon-flag ;
|
drop
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
end-of-input add-global-flag ;
|
||||||
|
|
||||||
M: negation nfa-node ( node -- )
|
M: negation nfa-node ( node -- )
|
||||||
negation-mode inc
|
negation-mode inc
|
||||||
|
|
|
@ -58,7 +58,7 @@ SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||||
alpha-class non-newline-blank-class
|
alpha-class non-newline-blank-class
|
||||||
ascii-class punctuation-class java-printable-class blank-class
|
ascii-class punctuation-class java-printable-class blank-class
|
||||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||||
terminator-class unmatchable-class word-boundary-class ;
|
unmatchable-class terminator-class word-boundary-class ;
|
||||||
|
|
||||||
SINGLETONS: beginning-of-group end-of-group
|
SINGLETONS: beginning-of-group end-of-group
|
||||||
beginning-of-character-class end-of-character-class
|
beginning-of-character-class end-of-character-class
|
||||||
|
@ -87,8 +87,8 @@ left-parenthesis pipe caret dash ;
|
||||||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||||
: <constant> ( obj -- constant )
|
: <constant> ( obj -- constant )
|
||||||
dup Letter? get-case-insensitive and [
|
dup Letter? get-case-insensitive and [
|
||||||
[ ch>lower constant boa ]
|
[ ch>lower ] [ ch>upper ] bi
|
||||||
[ ch>upper constant boa ] bi 2array <alternation>
|
[ constant boa ] bi@ 2array <alternation>
|
||||||
] [
|
] [
|
||||||
constant boa
|
constant boa
|
||||||
] if ;
|
] if ;
|
||||||
|
@ -384,20 +384,22 @@ DEFER: handle-left-bracket
|
||||||
} case
|
} case
|
||||||
[ (parse-character-class) ] when ;
|
[ (parse-character-class) ] when ;
|
||||||
|
|
||||||
|
: push-constant ( ch -- ) <constant> push-stack ;
|
||||||
|
|
||||||
: parse-character-class-second ( -- )
|
: parse-character-class-second ( -- )
|
||||||
read1 {
|
read1 {
|
||||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
{ CHAR: [ [ CHAR: [ push-constant ] }
|
||||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
{ CHAR: ] [ CHAR: ] push-constant ] }
|
||||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
{ CHAR: - [ CHAR: - push-constant ] }
|
||||||
[ push1 ]
|
[ push1 ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: parse-character-class-first ( -- )
|
: parse-character-class-first ( -- )
|
||||||
read1 {
|
read1 {
|
||||||
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
|
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
|
||||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
{ CHAR: [ [ CHAR: [ push-constant ] }
|
||||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
{ CHAR: ] [ CHAR: ] push-constant ] }
|
||||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
{ CHAR: - [ CHAR: - push-constant ] }
|
||||||
[ push1 ]
|
[ push1 ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
@ -431,7 +433,7 @@ DEFER: handle-left-bracket
|
||||||
drop
|
drop
|
||||||
handle-back-anchor f
|
handle-back-anchor f
|
||||||
] [
|
] [
|
||||||
<constant> push-stack t
|
push-constant t
|
||||||
] if
|
] if
|
||||||
]
|
]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
|
@ -25,12 +25,13 @@ TUPLE: default ;
|
||||||
: <default-transition> ( from to -- transition )
|
: <default-transition> ( from to -- transition )
|
||||||
t default-transition make-transition ;
|
t default-transition make-transition ;
|
||||||
|
|
||||||
TUPLE: transition-table transitions start-state final-states ;
|
TUPLE: transition-table transitions start-state final-states flags ;
|
||||||
|
|
||||||
: <transition-table> ( -- transition-table )
|
: <transition-table> ( -- transition-table )
|
||||||
transition-table new
|
transition-table new
|
||||||
H{ } clone >>transitions
|
H{ } clone >>transitions
|
||||||
H{ } clone >>final-states ;
|
H{ } clone >>final-states
|
||||||
|
H{ } clone >>flags ;
|
||||||
|
|
||||||
: maybe-initialize-key ( key hashtable -- )
|
: maybe-initialize-key ( key hashtable -- )
|
||||||
2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;
|
2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;
|
||||||
|
|
|
@ -82,6 +82,7 @@ M: end-of-input flag-action ( dfa-traverser flag -- )
|
||||||
drop
|
drop
|
||||||
dup end-of-text? [ t >>match-failed? ] unless drop ;
|
dup end-of-text? [ t >>match-failed? ] unless drop ;
|
||||||
|
|
||||||
|
|
||||||
M: beginning-of-line flag-action ( dfa-traverser flag -- )
|
M: beginning-of-line flag-action ( dfa-traverser flag -- )
|
||||||
drop
|
drop
|
||||||
dup {
|
dup {
|
||||||
|
@ -96,6 +97,7 @@ M: end-of-line flag-action ( dfa-traverser flag -- )
|
||||||
[ next-text-character terminator-class class-member? ]
|
[ next-text-character terminator-class class-member? ]
|
||||||
} 1|| [ t >>match-failed? ] unless drop ;
|
} 1|| [ t >>match-failed? ] unless drop ;
|
||||||
|
|
||||||
|
|
||||||
M: word-boundary flag-action ( dfa-traverser flag -- )
|
M: word-boundary flag-action ( dfa-traverser flag -- )
|
||||||
drop
|
drop
|
||||||
dup {
|
dup {
|
||||||
|
@ -103,6 +105,7 @@ M: word-boundary flag-action ( dfa-traverser flag -- )
|
||||||
[ current-text-character terminator-class class-member? ]
|
[ current-text-character terminator-class class-member? ]
|
||||||
} 1|| [ t >>match-failed? ] unless drop ;
|
} 1|| [ t >>match-failed? ] unless drop ;
|
||||||
|
|
||||||
|
|
||||||
M: lookahead-on flag-action ( dfa-traverser flag -- )
|
M: lookahead-on flag-action ( dfa-traverser flag -- )
|
||||||
drop
|
drop
|
||||||
lookahead-counters>> 0 swap push ;
|
lookahead-counters>> 0 swap push ;
|
||||||
|
|
Loading…
Reference in New Issue