parent
32e87a03cd
commit
97599d707b
|
@ -11,6 +11,7 @@ TUPLE: regexp
|
||||||
nfa-table
|
nfa-table
|
||||||
dfa-table
|
dfa-table
|
||||||
minimized-table
|
minimized-table
|
||||||
|
{ traversal-flags hashtable }
|
||||||
{ state integer }
|
{ state integer }
|
||||||
{ new-states vector }
|
{ new-states vector }
|
||||||
{ visited-states hashtable } ;
|
{ visited-states hashtable } ;
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
USING: accessors arrays assocs grouping kernel regexp2.backend
|
USING: accessors arrays assocs grouping kernel regexp2.backend
|
||||||
locals math namespaces regexp2.parser sequences state-tables fry
|
locals math namespaces regexp2.parser sequences state-tables fry
|
||||||
quotations math.order math.ranges vectors unicode.categories
|
quotations math.order math.ranges vectors unicode.categories
|
||||||
regexp2.utils regexp2.transition-tables words sequences.lib ;
|
regexp2.utils regexp2.transition-tables words sequences.lib sets ;
|
||||||
IN: regexp2.nfa
|
IN: regexp2.nfa
|
||||||
|
|
||||||
SYMBOL: negation-mode
|
SYMBOL: negation-mode
|
||||||
|
@ -11,6 +11,12 @@ SYMBOL: negation-mode
|
||||||
|
|
||||||
SINGLETON: eps
|
SINGLETON: eps
|
||||||
|
|
||||||
|
MIXIN: traversal-flag
|
||||||
|
SINGLETON: lookahead-on INSTANCE: lookahead-on traversal-flag
|
||||||
|
SINGLETON: lookahead-off INSTANCE: lookahead-off traversal-flag
|
||||||
|
SINGLETON: capture-group-on INSTANCE: capture-group-on traversal-flag
|
||||||
|
SINGLETON: capture-group-off INSTANCE: capture-group-off traversal-flag
|
||||||
|
|
||||||
: next-state ( regexp -- state )
|
: next-state ( regexp -- state )
|
||||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||||
|
|
||||||
|
@ -38,6 +44,10 @@ GENERIC: nfa-node ( node -- )
|
||||||
s0 s1 2array stack push
|
s0 s1 2array stack push
|
||||||
t s1 table final-states>> set-at ] ;
|
t s1 table final-states>> set-at ] ;
|
||||||
|
|
||||||
|
: add-traversal-flag ( flag -- )
|
||||||
|
stack peek second
|
||||||
|
current-regexp get traversal-flags>> push-at ;
|
||||||
|
|
||||||
:: concatenate-nodes ( -- )
|
:: concatenate-nodes ( -- )
|
||||||
[let* | regexp [ current-regexp get ]
|
[let* | regexp [ current-regexp get ]
|
||||||
stack [ regexp stack>> ]
|
stack [ regexp stack>> ]
|
||||||
|
@ -116,6 +126,14 @@ M: negation nfa-node ( node -- )
|
||||||
term>> nfa-node
|
term>> nfa-node
|
||||||
negation-mode dec ;
|
negation-mode dec ;
|
||||||
|
|
||||||
|
M: lookahead nfa-node ( node -- )
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
lookahead-on add-traversal-flag
|
||||||
|
term>> nfa-node
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
lookahead-off add-traversal-flag
|
||||||
|
2 [ concatenate-nodes ] times ;
|
||||||
|
|
||||||
: construct-nfa ( regexp -- )
|
: construct-nfa ( regexp -- )
|
||||||
[
|
[
|
||||||
reset-regexp
|
reset-regexp
|
||||||
|
|
|
@ -12,6 +12,7 @@ IN: regexp2
|
||||||
<transition-table> >>nfa-table
|
<transition-table> >>nfa-table
|
||||||
<transition-table> >>dfa-table
|
<transition-table> >>dfa-table
|
||||||
<transition-table> >>minimized-table
|
<transition-table> >>minimized-table
|
||||||
|
H{ } clone >>traversal-flags
|
||||||
reset-regexp ;
|
reset-regexp ;
|
||||||
|
|
||||||
: construct-regexp ( regexp -- regexp' )
|
: construct-regexp ( regexp -- regexp' )
|
||||||
|
@ -26,7 +27,8 @@ IN: regexp2
|
||||||
<dfa-traverser> do-match return-match ;
|
<dfa-traverser> do-match return-match ;
|
||||||
|
|
||||||
: matches? ( string regexp -- ? )
|
: matches? ( string regexp -- ? )
|
||||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
dupd match
|
||||||
|
[ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||||
|
|
||||||
: match-head ( string regexp -- end ) match length>> 1- ;
|
: match-head ( string regexp -- end ) match length>> 1- ;
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,10 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||||
vectors ;
|
vectors regexp2.utils ;
|
||||||
IN: regexp2.transition-tables
|
IN: regexp2.transition-tables
|
||||||
|
|
||||||
: insert-at ( value key hash -- )
|
TUPLE: transition from to obj ;
|
||||||
2dup at* [
|
|
||||||
2nip push
|
|
||||||
] [
|
|
||||||
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: ?insert-at ( value key hash/f -- hash )
|
|
||||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
|
||||||
|
|
||||||
TUPLE: transition from to obj flags ;
|
|
||||||
TUPLE: literal-transition < transition ;
|
TUPLE: literal-transition < transition ;
|
||||||
TUPLE: class-transition < transition ;
|
TUPLE: class-transition < transition ;
|
||||||
TUPLE: default-transition < transition ;
|
TUPLE: default-transition < transition ;
|
||||||
|
@ -26,8 +16,8 @@ TUPLE: default ;
|
||||||
new
|
new
|
||||||
swap >>obj
|
swap >>obj
|
||||||
swap >>to
|
swap >>to
|
||||||
swap >>from
|
swap >>from ;
|
||||||
H{ } clone >>flags ;
|
|
||||||
: <literal-transition> ( from to obj -- transition )
|
: <literal-transition> ( from to obj -- transition )
|
||||||
literal-transition make-transition ;
|
literal-transition make-transition ;
|
||||||
: <class-transition> ( from to obj -- transition )
|
: <class-transition> ( from to obj -- transition )
|
||||||
|
@ -35,9 +25,7 @@ TUPLE: default ;
|
||||||
: <default-transition> ( from to -- transition )
|
: <default-transition> ( from to -- transition )
|
||||||
t default-transition make-transition ;
|
t default-transition make-transition ;
|
||||||
|
|
||||||
TUPLE: transition-table transitions
|
TUPLE: transition-table transitions start-state final-states ;
|
||||||
literals classes defaults
|
|
||||||
start-state final-states ;
|
|
||||||
|
|
||||||
: <transition-table> ( -- transition-table )
|
: <transition-table> ( -- transition-table )
|
||||||
transition-table new
|
transition-table new
|
||||||
|
@ -45,7 +33,7 @@ TUPLE: transition-table transitions
|
||||||
H{ } clone >>final-states ;
|
H{ } clone >>final-states ;
|
||||||
|
|
||||||
: set-transition ( transition hash -- )
|
: set-transition ( transition hash -- )
|
||||||
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
|
[ [ to>> ] [ obj>> ] [ from>> ] tri ] dip
|
||||||
2dup at* [ 2nip insert-at ]
|
2dup at* [ 2nip insert-at ]
|
||||||
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
|
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
|
||||||
|
|
||||||
|
|
|
@ -3,25 +3,31 @@
|
||||||
USING: accessors assocs combinators combinators.lib kernel
|
USING: accessors assocs combinators combinators.lib kernel
|
||||||
math math.ranges quotations sequences regexp2.parser
|
math math.ranges quotations sequences regexp2.parser
|
||||||
regexp2.classes combinators.short-circuit assocs.lib
|
regexp2.classes combinators.short-circuit assocs.lib
|
||||||
sequences.lib ;
|
sequences.lib regexp2.utils ;
|
||||||
IN: regexp2.traversal
|
IN: regexp2.traversal
|
||||||
|
|
||||||
TUPLE: dfa-traverser
|
TUPLE: dfa-traverser
|
||||||
dfa-table
|
dfa-table
|
||||||
|
traversal-flags
|
||||||
|
capture-groups
|
||||||
|
{ capture-group-index integer }
|
||||||
|
{ lookahead-counter integer }
|
||||||
last-state current-state
|
last-state current-state
|
||||||
text
|
text
|
||||||
start-index current-index
|
start-index current-index
|
||||||
matches ;
|
matches ;
|
||||||
|
|
||||||
: <dfa-traverser> ( text regexp -- match )
|
: <dfa-traverser> ( text regexp -- match )
|
||||||
dfa-table>>
|
[ dfa-table>> ] [ traversal-flags>> ] bi
|
||||||
dfa-traverser new
|
dfa-traverser new
|
||||||
|
swap >>traversal-flags
|
||||||
swap [ start-state>> >>current-state ] keep
|
swap [ start-state>> >>current-state ] keep
|
||||||
>>dfa-table
|
>>dfa-table
|
||||||
swap >>text
|
swap >>text
|
||||||
0 >>start-index
|
0 >>start-index
|
||||||
0 >>current-index
|
0 >>current-index
|
||||||
V{ } clone >>matches ;
|
V{ } clone >>matches
|
||||||
|
V{ } clone >>capture-groups ;
|
||||||
|
|
||||||
: final-state? ( dfa-traverser -- ? )
|
: final-state? ( dfa-traverser -- ? )
|
||||||
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
||||||
|
@ -49,9 +55,6 @@ TUPLE: dfa-traverser
|
||||||
: match-literal ( transition from-state table -- to-state/f )
|
: match-literal ( transition from-state table -- to-state/f )
|
||||||
transitions>> [ at ] [ 2drop f ] if-at ;
|
transitions>> [ at ] [ 2drop f ] if-at ;
|
||||||
|
|
||||||
: assoc-with ( param assoc quot -- assoc curry )
|
|
||||||
swapd [ [ -rot ] dip call ] 2curry ; inline
|
|
||||||
|
|
||||||
: match-class ( transition from-state table -- to-state/f )
|
: match-class ( transition from-state table -- to-state/f )
|
||||||
transitions>> at* [
|
transitions>> at* [
|
||||||
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
USING: accessors arrays assocs combinators.lib io kernel
|
USING: accessors arrays assocs combinators.lib io kernel
|
||||||
math math.order namespaces regexp2.backend sequences
|
math math.order namespaces regexp2.backend sequences
|
||||||
sequences.lib unicode.categories math.ranges fry
|
sequences.lib unicode.categories math.ranges fry
|
||||||
combinators.short-circuit ;
|
combinators.short-circuit vectors ;
|
||||||
IN: regexp2.utils
|
IN: regexp2.utils
|
||||||
|
|
||||||
: (while-changes) ( obj quot pred pred-ret -- obj )
|
: (while-changes) ( obj quot pred pred-ret -- obj )
|
||||||
|
@ -15,6 +15,20 @@ IN: regexp2.utils
|
||||||
: while-changes ( obj quot pred -- obj' )
|
: while-changes ( obj quot pred -- obj' )
|
||||||
pick over call (while-changes) ; inline
|
pick over call (while-changes) ; inline
|
||||||
|
|
||||||
|
: assoc-with ( param assoc quot -- assoc curry )
|
||||||
|
swapd [ [ -rot ] dip call ] 2curry ; inline
|
||||||
|
|
||||||
|
: insert-at ( value key hash -- )
|
||||||
|
2dup at* [
|
||||||
|
2nip push
|
||||||
|
] [
|
||||||
|
drop
|
||||||
|
[ dup vector? [ 1vector ] unless ] 2dip set-at
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: ?insert-at ( value key hash/f -- hash )
|
||||||
|
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||||
|
|
||||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
||||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||||
|
|
Loading…
Reference in New Issue