2008-08-26 21:24:14 -04:00
|
|
|
! Copyright (C) 2008 Doug Coleman.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2008-09-18 15:42:16 -04:00
|
|
|
USING: accessors arrays assocs grouping kernel regexp.backend
|
|
|
|
locals math namespaces regexp.parser sequences state-tables fry
|
2008-08-26 21:24:14 -04:00
|
|
|
quotations math.order math.ranges vectors unicode.categories
|
2008-09-18 15:42:16 -04:00
|
|
|
regexp.utils regexp.transition-tables words sets ;
|
|
|
|
IN: regexp.nfa
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
SYMBOL: negation-mode
|
|
|
|
: negated? ( -- ? ) negation-mode get 0 or odd? ;
|
|
|
|
|
|
|
|
SINGLETON: eps
|
|
|
|
|
2008-08-28 14:45:04 -04:00
|
|
|
MIXIN: traversal-flag
|
|
|
|
SINGLETON: lookahead-on INSTANCE: lookahead-on traversal-flag
|
|
|
|
SINGLETON: lookahead-off INSTANCE: lookahead-off traversal-flag
|
2008-09-22 14:37:27 -04:00
|
|
|
SINGLETON: lookbehind-on INSTANCE: lookbehind-on traversal-flag
|
|
|
|
SINGLETON: lookbehind-off INSTANCE: lookbehind-off traversal-flag
|
2008-08-28 14:45:04 -04:00
|
|
|
SINGLETON: capture-group-on INSTANCE: capture-group-on traversal-flag
|
|
|
|
SINGLETON: capture-group-off INSTANCE: capture-group-off traversal-flag
|
2008-11-24 01:18:27 -05:00
|
|
|
SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
|
|
|
|
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
|
|
|
|
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
|
2008-08-28 14:45:04 -04:00
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
: next-state ( regexp -- state )
|
|
|
|
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
|
|
|
|
|
|
|
: set-start-state ( regexp -- )
|
|
|
|
dup stack>> [
|
|
|
|
drop
|
|
|
|
] [
|
|
|
|
[ nfa-table>> ] [ pop first ] bi* >>start-state drop
|
|
|
|
] if-empty ;
|
|
|
|
|
|
|
|
GENERIC: nfa-node ( node -- )
|
|
|
|
|
|
|
|
:: add-simple-entry ( obj class -- )
|
|
|
|
[let* | regexp [ current-regexp get ]
|
|
|
|
s0 [ regexp next-state ]
|
|
|
|
s1 [ regexp next-state ]
|
|
|
|
stack [ regexp stack>> ]
|
|
|
|
table [ regexp nfa-table>> ] |
|
|
|
|
negated? [
|
2008-08-27 17:22:34 -04:00
|
|
|
s0 f obj class make-transition table add-transition
|
2008-08-26 21:24:14 -04:00
|
|
|
s0 s1 <default-transition> table add-transition
|
|
|
|
] [
|
2008-08-27 17:22:34 -04:00
|
|
|
s0 s1 obj class make-transition table add-transition
|
2008-08-26 21:24:14 -04:00
|
|
|
] if
|
|
|
|
s0 s1 2array stack push
|
|
|
|
t s1 table final-states>> set-at ] ;
|
|
|
|
|
2008-08-28 14:45:04 -04:00
|
|
|
: add-traversal-flag ( flag -- )
|
|
|
|
stack peek second
|
2008-08-28 23:08:54 -04:00
|
|
|
current-regexp get nfa-traversal-flags>> push-at ;
|
2008-08-28 14:45:04 -04:00
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
:: concatenate-nodes ( -- )
|
|
|
|
[let* | regexp [ current-regexp get ]
|
|
|
|
stack [ regexp stack>> ]
|
|
|
|
table [ regexp nfa-table>> ]
|
|
|
|
s2 [ stack peek first ]
|
|
|
|
s3 [ stack pop second ]
|
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ] |
|
|
|
|
s1 s2 eps <literal-transition> table add-transition
|
|
|
|
s1 table final-states>> delete-at
|
|
|
|
s0 s3 2array stack push ] ;
|
|
|
|
|
|
|
|
:: alternate-nodes ( -- )
|
|
|
|
[let* | regexp [ current-regexp get ]
|
|
|
|
stack [ regexp stack>> ]
|
|
|
|
table [ regexp nfa-table>> ]
|
|
|
|
s2 [ stack peek first ]
|
|
|
|
s3 [ stack pop second ]
|
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ]
|
|
|
|
s4 [ regexp next-state ]
|
|
|
|
s5 [ regexp next-state ] |
|
|
|
|
s4 s0 eps <literal-transition> table add-transition
|
|
|
|
s4 s2 eps <literal-transition> table add-transition
|
|
|
|
s1 s5 eps <literal-transition> table add-transition
|
|
|
|
s3 s5 eps <literal-transition> table add-transition
|
|
|
|
s1 table final-states>> delete-at
|
|
|
|
s3 table final-states>> delete-at
|
|
|
|
t s5 table final-states>> set-at
|
|
|
|
s4 s5 2array stack push ] ;
|
|
|
|
|
|
|
|
M: kleene-star nfa-node ( node -- )
|
|
|
|
term>> nfa-node
|
|
|
|
[let* | regexp [ current-regexp get ]
|
|
|
|
stack [ regexp stack>> ]
|
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ]
|
|
|
|
s2 [ regexp next-state ]
|
|
|
|
s3 [ regexp next-state ]
|
|
|
|
table [ regexp nfa-table>> ] |
|
|
|
|
s1 table final-states>> delete-at
|
|
|
|
t s3 table final-states>> set-at
|
|
|
|
s1 s0 eps <literal-transition> table add-transition
|
|
|
|
s2 s0 eps <literal-transition> table add-transition
|
|
|
|
s2 s3 eps <literal-transition> table add-transition
|
|
|
|
s1 s3 eps <literal-transition> table add-transition
|
|
|
|
s2 s3 2array stack push ] ;
|
|
|
|
|
|
|
|
M: concatenation nfa-node ( node -- )
|
|
|
|
seq>>
|
|
|
|
[ [ nfa-node ] each ]
|
|
|
|
[ length 1- [ concatenate-nodes ] times ] bi ;
|
|
|
|
|
|
|
|
M: alternation nfa-node ( node -- )
|
|
|
|
seq>>
|
|
|
|
[ [ nfa-node ] each ]
|
|
|
|
[ length 1- [ alternate-nodes ] times ] bi ;
|
|
|
|
|
|
|
|
M: constant nfa-node ( node -- )
|
|
|
|
char>> literal-transition add-simple-entry ;
|
|
|
|
|
|
|
|
M: epsilon nfa-node ( node -- )
|
|
|
|
drop eps literal-transition add-simple-entry ;
|
|
|
|
|
|
|
|
M: word nfa-node ( node -- )
|
|
|
|
class-transition add-simple-entry ;
|
|
|
|
|
|
|
|
M: character-class-range nfa-node ( node -- )
|
|
|
|
class-transition add-simple-entry ;
|
|
|
|
|
|
|
|
M: capture-group nfa-node ( node -- )
|
2008-09-22 21:09:42 -04:00
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
capture-group-on add-traversal-flag
|
|
|
|
term>> nfa-node
|
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
capture-group-off add-traversal-flag
|
|
|
|
2 [ concatenate-nodes ] times ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2008-09-18 15:42:16 -04:00
|
|
|
! xyzzy
|
|
|
|
M: non-capture-group nfa-node ( node -- )
|
|
|
|
term>> nfa-node ;
|
|
|
|
|
|
|
|
M: reluctant-kleene-star nfa-node ( node -- )
|
|
|
|
term>> <kleene-star> nfa-node ;
|
|
|
|
|
2008-11-24 01:18:27 -05:00
|
|
|
|
|
|
|
: add-epsilon-flag ( flag -- )
|
|
|
|
eps literal-transition add-simple-entry add-traversal-flag ;
|
|
|
|
|
|
|
|
M: beginning-of-line nfa-node ( node -- )
|
|
|
|
drop beginning-of-line add-epsilon-flag ;
|
|
|
|
|
|
|
|
M: end-of-line nfa-node ( node -- )
|
|
|
|
drop end-of-line add-epsilon-flag ;
|
|
|
|
|
|
|
|
M: beginning-of-input nfa-node ( node -- )
|
|
|
|
drop beginning-of-input add-epsilon-flag ;
|
|
|
|
|
|
|
|
M: end-of-input nfa-node ( node -- )
|
|
|
|
drop end-of-input add-epsilon-flag ;
|
2008-09-18 15:42:16 -04:00
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
M: negation nfa-node ( node -- )
|
|
|
|
negation-mode inc
|
|
|
|
term>> nfa-node
|
|
|
|
negation-mode dec ;
|
|
|
|
|
2008-08-28 14:45:04 -04:00
|
|
|
M: lookahead nfa-node ( node -- )
|
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
lookahead-on add-traversal-flag
|
|
|
|
term>> nfa-node
|
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
lookahead-off add-traversal-flag
|
|
|
|
2 [ concatenate-nodes ] times ;
|
|
|
|
|
2008-09-22 14:37:27 -04:00
|
|
|
M: lookbehind nfa-node ( node -- )
|
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
lookbehind-on add-traversal-flag
|
|
|
|
term>> nfa-node
|
|
|
|
eps literal-transition add-simple-entry
|
|
|
|
lookbehind-off add-traversal-flag
|
|
|
|
2 [ concatenate-nodes ] times ;
|
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
: construct-nfa ( regexp -- )
|
|
|
|
[
|
|
|
|
reset-regexp
|
|
|
|
negation-mode off
|
|
|
|
[ current-regexp set ]
|
|
|
|
[ parse-tree>> nfa-node ]
|
|
|
|
[ set-start-state ] tri
|
|
|
|
] with-scope ;
|