parent
							
								
									32e87a03cd
								
							
						
					
					
						commit
						97599d707b
					
				| 
						 | 
				
			
			@ -11,6 +11,7 @@ TUPLE: regexp
 | 
			
		|||
    nfa-table
 | 
			
		||||
    dfa-table
 | 
			
		||||
    minimized-table
 | 
			
		||||
    { traversal-flags hashtable }
 | 
			
		||||
    { state integer }
 | 
			
		||||
    { new-states vector }
 | 
			
		||||
    { visited-states hashtable } ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
USING: accessors arrays assocs grouping kernel regexp2.backend
 | 
			
		||||
locals math namespaces regexp2.parser sequences state-tables fry
 | 
			
		||||
quotations math.order math.ranges vectors unicode.categories
 | 
			
		||||
regexp2.utils regexp2.transition-tables words sequences.lib ;
 | 
			
		||||
regexp2.utils regexp2.transition-tables words sequences.lib sets ;
 | 
			
		||||
IN: regexp2.nfa
 | 
			
		||||
 | 
			
		||||
SYMBOL: negation-mode
 | 
			
		||||
| 
						 | 
				
			
			@ -11,6 +11,12 @@ SYMBOL: negation-mode
 | 
			
		|||
 | 
			
		||||
SINGLETON: eps
 | 
			
		||||
 | 
			
		||||
MIXIN: traversal-flag
 | 
			
		||||
SINGLETON: lookahead-on INSTANCE: lookahead-on traversal-flag
 | 
			
		||||
SINGLETON: lookahead-off INSTANCE: lookahead-off traversal-flag
 | 
			
		||||
SINGLETON: capture-group-on INSTANCE: capture-group-on traversal-flag
 | 
			
		||||
SINGLETON: capture-group-off INSTANCE: capture-group-off traversal-flag
 | 
			
		||||
 | 
			
		||||
: next-state ( regexp -- state )
 | 
			
		||||
    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -38,6 +44,10 @@ GENERIC: nfa-node ( node -- )
 | 
			
		|||
        s0 s1 2array stack push
 | 
			
		||||
        t s1 table final-states>> set-at ] ;
 | 
			
		||||
 | 
			
		||||
: add-traversal-flag ( flag -- )
 | 
			
		||||
    stack peek second
 | 
			
		||||
    current-regexp get traversal-flags>> push-at ;
 | 
			
		||||
 | 
			
		||||
:: concatenate-nodes ( -- )
 | 
			
		||||
    [let* | regexp [ current-regexp get ]
 | 
			
		||||
            stack [ regexp stack>> ]
 | 
			
		||||
| 
						 | 
				
			
			@ -116,6 +126,14 @@ M: negation nfa-node ( node -- )
 | 
			
		|||
    term>> nfa-node 
 | 
			
		||||
    negation-mode dec ;
 | 
			
		||||
 | 
			
		||||
M: lookahead nfa-node ( node -- )
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    lookahead-on add-traversal-flag
 | 
			
		||||
    term>> nfa-node
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    lookahead-off add-traversal-flag
 | 
			
		||||
    2 [ concatenate-nodes ] times ;
 | 
			
		||||
 | 
			
		||||
: construct-nfa ( regexp -- )
 | 
			
		||||
    [
 | 
			
		||||
        reset-regexp
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,7 @@ IN: regexp2
 | 
			
		|||
        <transition-table> >>nfa-table
 | 
			
		||||
        <transition-table> >>dfa-table
 | 
			
		||||
        <transition-table> >>minimized-table
 | 
			
		||||
        H{ } clone >>traversal-flags
 | 
			
		||||
        reset-regexp ;
 | 
			
		||||
 | 
			
		||||
: construct-regexp ( regexp -- regexp' )
 | 
			
		||||
| 
						 | 
				
			
			@ -26,7 +27,8 @@ IN: regexp2
 | 
			
		|||
    <dfa-traverser> do-match return-match ;
 | 
			
		||||
 | 
			
		||||
: matches? ( string regexp -- ? )
 | 
			
		||||
    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 | 
			
		||||
    dupd match
 | 
			
		||||
    [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 | 
			
		||||
 | 
			
		||||
: match-head ( string regexp -- end ) match length>> 1- ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,20 +1,10 @@
 | 
			
		|||
! Copyright (C) 2008 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors arrays assocs fry hashtables kernel sequences
 | 
			
		||||
vectors ;
 | 
			
		||||
vectors regexp2.utils ;
 | 
			
		||||
IN: regexp2.transition-tables
 | 
			
		||||
 | 
			
		||||
: insert-at ( value key hash -- )
 | 
			
		||||
    2dup at* [
 | 
			
		||||
        2nip push
 | 
			
		||||
    ] [
 | 
			
		||||
        drop >r >r dup vector? [ 1vector ] unless r> r> set-at
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
: ?insert-at ( value key hash/f -- hash )
 | 
			
		||||
    [ H{ } clone ] unless* [ insert-at ] keep ;
 | 
			
		||||
 | 
			
		||||
TUPLE: transition from to obj flags ;
 | 
			
		||||
TUPLE: transition from to obj ;
 | 
			
		||||
TUPLE: literal-transition < transition ;
 | 
			
		||||
TUPLE: class-transition < transition ;
 | 
			
		||||
TUPLE: default-transition < transition ;
 | 
			
		||||
| 
						 | 
				
			
			@ -26,8 +16,8 @@ TUPLE: default ;
 | 
			
		|||
    new
 | 
			
		||||
        swap >>obj
 | 
			
		||||
        swap >>to
 | 
			
		||||
        swap >>from
 | 
			
		||||
        H{ } clone >>flags ;
 | 
			
		||||
        swap >>from ;
 | 
			
		||||
 | 
			
		||||
: <literal-transition> ( from to obj -- transition )
 | 
			
		||||
    literal-transition make-transition ;
 | 
			
		||||
: <class-transition> ( from to obj -- transition )
 | 
			
		||||
| 
						 | 
				
			
			@ -35,9 +25,7 @@ TUPLE: default ;
 | 
			
		|||
: <default-transition> ( from to -- transition )
 | 
			
		||||
    t default-transition make-transition ;
 | 
			
		||||
 | 
			
		||||
TUPLE: transition-table transitions
 | 
			
		||||
    literals classes defaults
 | 
			
		||||
    start-state final-states ;
 | 
			
		||||
TUPLE: transition-table transitions start-state final-states ;
 | 
			
		||||
 | 
			
		||||
: <transition-table> ( -- transition-table )
 | 
			
		||||
    transition-table new
 | 
			
		||||
| 
						 | 
				
			
			@ -45,7 +33,7 @@ TUPLE: transition-table transitions
 | 
			
		|||
        H{ } clone >>final-states ;
 | 
			
		||||
 | 
			
		||||
: set-transition ( transition hash -- )
 | 
			
		||||
    >r [ to>> ] [ obj>> ] [ from>> ] tri r>
 | 
			
		||||
    [ [ to>> ] [ obj>> ] [ from>> ] tri ] dip
 | 
			
		||||
    2dup at* [ 2nip insert-at ]
 | 
			
		||||
    [ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,25 +3,31 @@
 | 
			
		|||
USING: accessors assocs combinators combinators.lib kernel
 | 
			
		||||
math math.ranges quotations sequences regexp2.parser
 | 
			
		||||
regexp2.classes combinators.short-circuit assocs.lib
 | 
			
		||||
sequences.lib ;
 | 
			
		||||
sequences.lib regexp2.utils ;
 | 
			
		||||
IN: regexp2.traversal
 | 
			
		||||
 | 
			
		||||
TUPLE: dfa-traverser
 | 
			
		||||
    dfa-table
 | 
			
		||||
    traversal-flags
 | 
			
		||||
    capture-groups
 | 
			
		||||
    { capture-group-index integer }
 | 
			
		||||
    { lookahead-counter integer }
 | 
			
		||||
    last-state current-state
 | 
			
		||||
    text
 | 
			
		||||
    start-index current-index
 | 
			
		||||
    matches ;
 | 
			
		||||
 | 
			
		||||
: <dfa-traverser> ( text regexp -- match )
 | 
			
		||||
    dfa-table>>
 | 
			
		||||
    [ dfa-table>> ] [ traversal-flags>> ] bi
 | 
			
		||||
    dfa-traverser new
 | 
			
		||||
        swap >>traversal-flags
 | 
			
		||||
        swap [ start-state>> >>current-state ] keep
 | 
			
		||||
        >>dfa-table
 | 
			
		||||
        swap >>text
 | 
			
		||||
        0 >>start-index
 | 
			
		||||
        0 >>current-index
 | 
			
		||||
        V{ } clone >>matches ;
 | 
			
		||||
        V{ } clone >>matches
 | 
			
		||||
        V{ } clone >>capture-groups ;
 | 
			
		||||
 | 
			
		||||
: final-state? ( dfa-traverser -- ? )
 | 
			
		||||
    [ current-state>> ] [ dfa-table>> final-states>> ] bi
 | 
			
		||||
| 
						 | 
				
			
			@ -49,9 +55,6 @@ TUPLE: dfa-traverser
 | 
			
		|||
: match-literal ( transition from-state table -- to-state/f )
 | 
			
		||||
    transitions>> [ at ] [ 2drop f ] if-at ;
 | 
			
		||||
 | 
			
		||||
: assoc-with ( param assoc quot -- assoc curry )
 | 
			
		||||
    swapd [ [ -rot ] dip call ] 2curry ; inline
 | 
			
		||||
 | 
			
		||||
: match-class ( transition from-state table -- to-state/f )
 | 
			
		||||
    transitions>> at* [
 | 
			
		||||
        [ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@
 | 
			
		|||
USING: accessors arrays assocs combinators.lib io kernel
 | 
			
		||||
math math.order namespaces regexp2.backend sequences
 | 
			
		||||
sequences.lib unicode.categories math.ranges fry
 | 
			
		||||
combinators.short-circuit ;
 | 
			
		||||
combinators.short-circuit vectors ;
 | 
			
		||||
IN: regexp2.utils
 | 
			
		||||
 | 
			
		||||
: (while-changes) ( obj quot pred pred-ret -- obj )
 | 
			
		||||
| 
						 | 
				
			
			@ -15,6 +15,20 @@ IN: regexp2.utils
 | 
			
		|||
: while-changes ( obj quot pred -- obj' )
 | 
			
		||||
    pick over call (while-changes) ; inline
 | 
			
		||||
 | 
			
		||||
: assoc-with ( param assoc quot -- assoc curry )
 | 
			
		||||
    swapd [ [ -rot ] dip call ] 2curry ; inline
 | 
			
		||||
 | 
			
		||||
: insert-at ( value key hash -- )
 | 
			
		||||
    2dup at* [
 | 
			
		||||
        2nip push
 | 
			
		||||
    ] [
 | 
			
		||||
        drop
 | 
			
		||||
        [ dup vector? [ 1vector ] unless ] 2dip set-at
 | 
			
		||||
    ] if ;
 | 
			
		||||
 | 
			
		||||
: ?insert-at ( value key hash/f -- hash )
 | 
			
		||||
    [ H{ } clone ] unless* [ insert-at ] keep ;
 | 
			
		||||
 | 
			
		||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
 | 
			
		||||
: push1 ( obj -- ) input-stream get stream>> push ;
 | 
			
		||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue