moved a few util words around

added traversal-flags
db4
Doug Coleman 2008-08-28 13:45:04 -05:00
parent 32e87a03cd
commit 97599d707b
6 changed files with 53 additions and 27 deletions

View File

@ -11,6 +11,7 @@ TUPLE: regexp
nfa-table
dfa-table
minimized-table
{ traversal-flags hashtable }
{ state integer }
{ new-states vector }
{ visited-states hashtable } ;

View File

@ -3,7 +3,7 @@
USING: accessors arrays assocs grouping kernel regexp2.backend
locals math namespaces regexp2.parser sequences state-tables fry
quotations math.order math.ranges vectors unicode.categories
regexp2.utils regexp2.transition-tables words sequences.lib ;
regexp2.utils regexp2.transition-tables words sequences.lib sets ;
IN: regexp2.nfa
SYMBOL: negation-mode
@ -11,6 +11,12 @@ SYMBOL: negation-mode
SINGLETON: eps
MIXIN: traversal-flag
SINGLETON: lookahead-on INSTANCE: lookahead-on traversal-flag
SINGLETON: lookahead-off INSTANCE: lookahead-off traversal-flag
SINGLETON: capture-group-on INSTANCE: capture-group-on traversal-flag
SINGLETON: capture-group-off INSTANCE: capture-group-off traversal-flag
: next-state ( regexp -- state )
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
@ -38,6 +44,10 @@ GENERIC: nfa-node ( node -- )
s0 s1 2array stack push
t s1 table final-states>> set-at ] ;
: add-traversal-flag ( flag -- )
stack peek second
current-regexp get traversal-flags>> push-at ;
:: concatenate-nodes ( -- )
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
@ -116,6 +126,14 @@ M: negation nfa-node ( node -- )
term>> nfa-node
negation-mode dec ;
M: lookahead nfa-node ( node -- )
eps literal-transition add-simple-entry
lookahead-on add-traversal-flag
term>> nfa-node
eps literal-transition add-simple-entry
lookahead-off add-traversal-flag
2 [ concatenate-nodes ] times ;
: construct-nfa ( regexp -- )
[
reset-regexp

View File

@ -12,6 +12,7 @@ IN: regexp2
<transition-table> >>nfa-table
<transition-table> >>dfa-table
<transition-table> >>minimized-table
H{ } clone >>traversal-flags
reset-regexp ;
: construct-regexp ( regexp -- regexp' )
@ -26,7 +27,8 @@ IN: regexp2
<dfa-traverser> do-match return-match ;
: matches? ( string regexp -- ? )
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
dupd match
[ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
: match-head ( string regexp -- end ) match length>> 1- ;

View File

@ -1,20 +1,10 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs fry hashtables kernel sequences
vectors ;
vectors regexp2.utils ;
IN: regexp2.transition-tables
: insert-at ( value key hash -- )
2dup at* [
2nip push
] [
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
] if ;
: ?insert-at ( value key hash/f -- hash )
[ H{ } clone ] unless* [ insert-at ] keep ;
TUPLE: transition from to obj flags ;
TUPLE: transition from to obj ;
TUPLE: literal-transition < transition ;
TUPLE: class-transition < transition ;
TUPLE: default-transition < transition ;
@ -26,8 +16,8 @@ TUPLE: default ;
new
swap >>obj
swap >>to
swap >>from
H{ } clone >>flags ;
swap >>from ;
: <literal-transition> ( from to obj -- transition )
literal-transition make-transition ;
: <class-transition> ( from to obj -- transition )
@ -35,9 +25,7 @@ TUPLE: default ;
: <default-transition> ( from to -- transition )
t default-transition make-transition ;
TUPLE: transition-table transitions
literals classes defaults
start-state final-states ;
TUPLE: transition-table transitions start-state final-states ;
: <transition-table> ( -- transition-table )
transition-table new
@ -45,7 +33,7 @@ TUPLE: transition-table transitions
H{ } clone >>final-states ;
: set-transition ( transition hash -- )
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
[ [ to>> ] [ obj>> ] [ from>> ] tri ] dip
2dup at* [ 2nip insert-at ]
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;

View File

@ -3,25 +3,31 @@
USING: accessors assocs combinators combinators.lib kernel
math math.ranges quotations sequences regexp2.parser
regexp2.classes combinators.short-circuit assocs.lib
sequences.lib ;
sequences.lib regexp2.utils ;
IN: regexp2.traversal
TUPLE: dfa-traverser
dfa-table
traversal-flags
capture-groups
{ capture-group-index integer }
{ lookahead-counter integer }
last-state current-state
text
start-index current-index
matches ;
: <dfa-traverser> ( text regexp -- match )
dfa-table>>
[ dfa-table>> ] [ traversal-flags>> ] bi
dfa-traverser new
swap >>traversal-flags
swap [ start-state>> >>current-state ] keep
>>dfa-table
swap >>text
0 >>start-index
0 >>current-index
V{ } clone >>matches ;
V{ } clone >>matches
V{ } clone >>capture-groups ;
: final-state? ( dfa-traverser -- ? )
[ current-state>> ] [ dfa-table>> final-states>> ] bi
@ -49,9 +55,6 @@ TUPLE: dfa-traverser
: match-literal ( transition from-state table -- to-state/f )
transitions>> [ at ] [ 2drop f ] if-at ;
: assoc-with ( param assoc quot -- assoc curry )
swapd [ [ -rot ] dip call ] 2curry ; inline
: match-class ( transition from-state table -- to-state/f )
transitions>> at* [
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if

View File

@ -3,7 +3,7 @@
USING: accessors arrays assocs combinators.lib io kernel
math math.order namespaces regexp2.backend sequences
sequences.lib unicode.categories math.ranges fry
combinators.short-circuit ;
combinators.short-circuit vectors ;
IN: regexp2.utils
: (while-changes) ( obj quot pred pred-ret -- obj )
@ -15,6 +15,20 @@ IN: regexp2.utils
: while-changes ( obj quot pred -- obj' )
pick over call (while-changes) ; inline
: assoc-with ( param assoc quot -- assoc curry )
swapd [ [ -rot ] dip call ] 2curry ; inline
: insert-at ( value key hash -- )
2dup at* [
2nip push
] [
drop
[ dup vector? [ 1vector ] unless ] 2dip set-at
] if ;
: ?insert-at ( value key hash/f -- hash )
[ H{ } clone ] unless* [ insert-at ] keep ;
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
: push1 ( obj -- ) input-stream get stream>> push ;
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;