parent
32e87a03cd
commit
97599d707b
|
@ -11,6 +11,7 @@ TUPLE: regexp
|
|||
nfa-table
|
||||
dfa-table
|
||||
minimized-table
|
||||
{ traversal-flags hashtable }
|
||||
{ state integer }
|
||||
{ new-states vector }
|
||||
{ visited-states hashtable } ;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors arrays assocs grouping kernel regexp2.backend
|
||||
locals math namespaces regexp2.parser sequences state-tables fry
|
||||
quotations math.order math.ranges vectors unicode.categories
|
||||
regexp2.utils regexp2.transition-tables words sequences.lib ;
|
||||
regexp2.utils regexp2.transition-tables words sequences.lib sets ;
|
||||
IN: regexp2.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
|
@ -11,6 +11,12 @@ SYMBOL: negation-mode
|
|||
|
||||
SINGLETON: eps
|
||||
|
||||
MIXIN: traversal-flag
|
||||
SINGLETON: lookahead-on INSTANCE: lookahead-on traversal-flag
|
||||
SINGLETON: lookahead-off INSTANCE: lookahead-off traversal-flag
|
||||
SINGLETON: capture-group-on INSTANCE: capture-group-on traversal-flag
|
||||
SINGLETON: capture-group-off INSTANCE: capture-group-off traversal-flag
|
||||
|
||||
: next-state ( regexp -- state )
|
||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||
|
||||
|
@ -38,6 +44,10 @@ GENERIC: nfa-node ( node -- )
|
|||
s0 s1 2array stack push
|
||||
t s1 table final-states>> set-at ] ;
|
||||
|
||||
: add-traversal-flag ( flag -- )
|
||||
stack peek second
|
||||
current-regexp get traversal-flags>> push-at ;
|
||||
|
||||
:: concatenate-nodes ( -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
|
@ -116,6 +126,14 @@ M: negation nfa-node ( node -- )
|
|||
term>> nfa-node
|
||||
negation-mode dec ;
|
||||
|
||||
M: lookahead nfa-node ( node -- )
|
||||
eps literal-transition add-simple-entry
|
||||
lookahead-on add-traversal-flag
|
||||
term>> nfa-node
|
||||
eps literal-transition add-simple-entry
|
||||
lookahead-off add-traversal-flag
|
||||
2 [ concatenate-nodes ] times ;
|
||||
|
||||
: construct-nfa ( regexp -- )
|
||||
[
|
||||
reset-regexp
|
||||
|
|
|
@ -12,6 +12,7 @@ IN: regexp2
|
|||
<transition-table> >>nfa-table
|
||||
<transition-table> >>dfa-table
|
||||
<transition-table> >>minimized-table
|
||||
H{ } clone >>traversal-flags
|
||||
reset-regexp ;
|
||||
|
||||
: construct-regexp ( regexp -- regexp' )
|
||||
|
@ -26,7 +27,8 @@ IN: regexp2
|
|||
<dfa-traverser> do-match return-match ;
|
||||
|
||||
: matches? ( string regexp -- ? )
|
||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
dupd match
|
||||
[ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end ) match length>> 1- ;
|
||||
|
||||
|
|
|
@ -1,20 +1,10 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||
vectors ;
|
||||
vectors regexp2.utils ;
|
||||
IN: regexp2.transition-tables
|
||||
|
||||
: insert-at ( value key hash -- )
|
||||
2dup at* [
|
||||
2nip push
|
||||
] [
|
||||
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
|
||||
] if ;
|
||||
|
||||
: ?insert-at ( value key hash/f -- hash )
|
||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||
|
||||
TUPLE: transition from to obj flags ;
|
||||
TUPLE: transition from to obj ;
|
||||
TUPLE: literal-transition < transition ;
|
||||
TUPLE: class-transition < transition ;
|
||||
TUPLE: default-transition < transition ;
|
||||
|
@ -26,8 +16,8 @@ TUPLE: default ;
|
|||
new
|
||||
swap >>obj
|
||||
swap >>to
|
||||
swap >>from
|
||||
H{ } clone >>flags ;
|
||||
swap >>from ;
|
||||
|
||||
: <literal-transition> ( from to obj -- transition )
|
||||
literal-transition make-transition ;
|
||||
: <class-transition> ( from to obj -- transition )
|
||||
|
@ -35,9 +25,7 @@ TUPLE: default ;
|
|||
: <default-transition> ( from to -- transition )
|
||||
t default-transition make-transition ;
|
||||
|
||||
TUPLE: transition-table transitions
|
||||
literals classes defaults
|
||||
start-state final-states ;
|
||||
TUPLE: transition-table transitions start-state final-states ;
|
||||
|
||||
: <transition-table> ( -- transition-table )
|
||||
transition-table new
|
||||
|
@ -45,7 +33,7 @@ TUPLE: transition-table transitions
|
|||
H{ } clone >>final-states ;
|
||||
|
||||
: set-transition ( transition hash -- )
|
||||
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
|
||||
[ [ to>> ] [ obj>> ] [ from>> ] tri ] dip
|
||||
2dup at* [ 2nip insert-at ]
|
||||
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
|
||||
|
||||
|
|
|
@ -3,25 +3,31 @@
|
|||
USING: accessors assocs combinators combinators.lib kernel
|
||||
math math.ranges quotations sequences regexp2.parser
|
||||
regexp2.classes combinators.short-circuit assocs.lib
|
||||
sequences.lib ;
|
||||
sequences.lib regexp2.utils ;
|
||||
IN: regexp2.traversal
|
||||
|
||||
TUPLE: dfa-traverser
|
||||
dfa-table
|
||||
traversal-flags
|
||||
capture-groups
|
||||
{ capture-group-index integer }
|
||||
{ lookahead-counter integer }
|
||||
last-state current-state
|
||||
text
|
||||
start-index current-index
|
||||
matches ;
|
||||
|
||||
: <dfa-traverser> ( text regexp -- match )
|
||||
dfa-table>>
|
||||
[ dfa-table>> ] [ traversal-flags>> ] bi
|
||||
dfa-traverser new
|
||||
swap >>traversal-flags
|
||||
swap [ start-state>> >>current-state ] keep
|
||||
>>dfa-table
|
||||
swap >>text
|
||||
0 >>start-index
|
||||
0 >>current-index
|
||||
V{ } clone >>matches ;
|
||||
V{ } clone >>matches
|
||||
V{ } clone >>capture-groups ;
|
||||
|
||||
: final-state? ( dfa-traverser -- ? )
|
||||
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
||||
|
@ -49,9 +55,6 @@ TUPLE: dfa-traverser
|
|||
: match-literal ( transition from-state table -- to-state/f )
|
||||
transitions>> [ at ] [ 2drop f ] if-at ;
|
||||
|
||||
: assoc-with ( param assoc quot -- assoc curry )
|
||||
swapd [ [ -rot ] dip call ] 2curry ; inline
|
||||
|
||||
: match-class ( transition from-state table -- to-state/f )
|
||||
transitions>> at* [
|
||||
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
USING: accessors arrays assocs combinators.lib io kernel
|
||||
math math.order namespaces regexp2.backend sequences
|
||||
sequences.lib unicode.categories math.ranges fry
|
||||
combinators.short-circuit ;
|
||||
combinators.short-circuit vectors ;
|
||||
IN: regexp2.utils
|
||||
|
||||
: (while-changes) ( obj quot pred pred-ret -- obj )
|
||||
|
@ -15,6 +15,20 @@ IN: regexp2.utils
|
|||
: while-changes ( obj quot pred -- obj' )
|
||||
pick over call (while-changes) ; inline
|
||||
|
||||
: assoc-with ( param assoc quot -- assoc curry )
|
||||
swapd [ [ -rot ] dip call ] 2curry ; inline
|
||||
|
||||
: insert-at ( value key hash -- )
|
||||
2dup at* [
|
||||
2nip push
|
||||
] [
|
||||
drop
|
||||
[ dup vector? [ 1vector ] unless ] 2dip set-at
|
||||
] if ;
|
||||
|
||||
: ?insert-at ( value key hash/f -- hash )
|
||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||
|
||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||
|
|
Loading…
Reference in New Issue