a bit of refactoring, preparing to take options out of the parsing stage
							parent
							
								
									87bdc0acd3
								
							
						
					
					
						commit
						f8a23c657b
					
				| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
! Copyright (C) 2008 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors hashtables kernel math state-tables vectors ;
 | 
			
		||||
USING: accessors hashtables kernel math vectors ;
 | 
			
		||||
IN: regexp.backend
 | 
			
		||||
 | 
			
		||||
TUPLE: regexp
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,9 +1,9 @@
 | 
			
		|||
! Copyright (C) 2008 Doug Coleman.
 | 
			
		||||
! See http://factorcode.org/license.txt for BSD license.
 | 
			
		||||
USING: accessors arrays assocs grouping kernel regexp.backend
 | 
			
		||||
locals math namespaces regexp.parser sequences state-tables fry
 | 
			
		||||
quotations math.order math.ranges vectors unicode.categories
 | 
			
		||||
regexp.utils regexp.transition-tables words sets ;
 | 
			
		||||
locals math namespaces regexp.parser sequences fry quotations
 | 
			
		||||
math.order math.ranges vectors unicode.categories regexp.utils
 | 
			
		||||
regexp.transition-tables words sets ;
 | 
			
		||||
IN: regexp.nfa
 | 
			
		||||
 | 
			
		||||
SYMBOL: negation-mode
 | 
			
		||||
| 
						 | 
				
			
			@ -22,6 +22,9 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
 | 
			
		|||
SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
 | 
			
		||||
SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag
 | 
			
		||||
 | 
			
		||||
: add-global-flag ( flag -- )
 | 
			
		||||
    current-regexp get nfa-table>> flags>> conjoin ;
 | 
			
		||||
 | 
			
		||||
: next-state ( regexp -- state )
 | 
			
		||||
    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -138,21 +141,25 @@ M: non-capture-group nfa-node ( node -- )
 | 
			
		|||
M: reluctant-kleene-star nfa-node ( node -- )
 | 
			
		||||
    term>> <kleene-star> nfa-node ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
: add-epsilon-flag ( flag -- )
 | 
			
		||||
    eps literal-transition add-simple-entry add-traversal-flag ;
 | 
			
		||||
 | 
			
		||||
M: beginning-of-line nfa-node ( node -- )
 | 
			
		||||
    drop beginning-of-line add-epsilon-flag ;
 | 
			
		||||
    drop 
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    beginning-of-line add-global-flag ;
 | 
			
		||||
 | 
			
		||||
M: end-of-line nfa-node ( node -- )
 | 
			
		||||
    drop end-of-line add-epsilon-flag ;
 | 
			
		||||
    drop
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    end-of-line add-global-flag ;
 | 
			
		||||
 | 
			
		||||
M: beginning-of-input nfa-node ( node -- )
 | 
			
		||||
    drop beginning-of-input add-epsilon-flag ;
 | 
			
		||||
    drop
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    beginning-of-input add-global-flag ;
 | 
			
		||||
 | 
			
		||||
M: end-of-input nfa-node ( node -- )
 | 
			
		||||
    drop end-of-input add-epsilon-flag ;
 | 
			
		||||
    drop
 | 
			
		||||
    eps literal-transition add-simple-entry
 | 
			
		||||
    end-of-input add-global-flag ;
 | 
			
		||||
 | 
			
		||||
M: negation nfa-node ( node -- )
 | 
			
		||||
    negation-mode inc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -58,7 +58,7 @@ SINGLETONS: letter-class LETTER-class Letter-class digit-class
 | 
			
		|||
alpha-class non-newline-blank-class
 | 
			
		||||
ascii-class punctuation-class java-printable-class blank-class
 | 
			
		||||
control-character-class hex-digit-class java-blank-class c-identifier-class
 | 
			
		||||
terminator-class unmatchable-class word-boundary-class ;
 | 
			
		||||
unmatchable-class terminator-class word-boundary-class ;
 | 
			
		||||
 | 
			
		||||
SINGLETONS: beginning-of-group end-of-group
 | 
			
		||||
beginning-of-character-class end-of-character-class
 | 
			
		||||
| 
						 | 
				
			
			@ -87,8 +87,8 @@ left-parenthesis pipe caret dash ;
 | 
			
		|||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
 | 
			
		||||
: <constant> ( obj -- constant )
 | 
			
		||||
    dup Letter? get-case-insensitive and [
 | 
			
		||||
        [ ch>lower constant boa ]
 | 
			
		||||
        [ ch>upper constant boa ] bi 2array <alternation>
 | 
			
		||||
        [ ch>lower ] [ ch>upper ] bi
 | 
			
		||||
        [ constant boa ] bi@ 2array <alternation>
 | 
			
		||||
    ] [
 | 
			
		||||
        constant boa
 | 
			
		||||
    ] if ;
 | 
			
		||||
| 
						 | 
				
			
			@ -384,20 +384,22 @@ DEFER: handle-left-bracket
 | 
			
		|||
    } case
 | 
			
		||||
    [ (parse-character-class) ] when ;
 | 
			
		||||
 | 
			
		||||
: push-constant ( ch -- ) <constant> push-stack ;
 | 
			
		||||
 | 
			
		||||
: parse-character-class-second ( -- )
 | 
			
		||||
    read1 {
 | 
			
		||||
        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
 | 
			
		||||
        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
 | 
			
		||||
        { CHAR: - [ CHAR: - <constant> push-stack ] }
 | 
			
		||||
        { CHAR: [ [ CHAR: [ push-constant ] }
 | 
			
		||||
        { CHAR: ] [ CHAR: ] push-constant ] }
 | 
			
		||||
        { CHAR: - [ CHAR: - push-constant ] }
 | 
			
		||||
        [ push1 ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
: parse-character-class-first ( -- )
 | 
			
		||||
    read1 {
 | 
			
		||||
        { CHAR: ^ [ caret push-stack parse-character-class-second ] }
 | 
			
		||||
        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
 | 
			
		||||
        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
 | 
			
		||||
        { CHAR: - [ CHAR: - <constant> push-stack ] }
 | 
			
		||||
        { CHAR: [ [ CHAR: [ push-constant ] }
 | 
			
		||||
        { CHAR: ] [ CHAR: ] push-constant ] }
 | 
			
		||||
        { CHAR: - [ CHAR: - push-constant ] }
 | 
			
		||||
        [ push1 ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -431,7 +433,7 @@ DEFER: handle-left-bracket
 | 
			
		|||
                drop
 | 
			
		||||
                handle-back-anchor f
 | 
			
		||||
            ] [
 | 
			
		||||
                <constant> push-stack t
 | 
			
		||||
                push-constant t
 | 
			
		||||
            ] if
 | 
			
		||||
        ]
 | 
			
		||||
    } case ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,12 +25,13 @@ TUPLE: default ;
 | 
			
		|||
: <default-transition> ( from to -- transition )
 | 
			
		||||
    t default-transition make-transition ;
 | 
			
		||||
 | 
			
		||||
TUPLE: transition-table transitions start-state final-states ;
 | 
			
		||||
TUPLE: transition-table transitions start-state final-states flags ;
 | 
			
		||||
 | 
			
		||||
: <transition-table> ( -- transition-table )
 | 
			
		||||
    transition-table new
 | 
			
		||||
        H{ } clone >>transitions
 | 
			
		||||
        H{ } clone >>final-states ;
 | 
			
		||||
        H{ } clone >>final-states
 | 
			
		||||
        H{ } clone >>flags ;
 | 
			
		||||
 | 
			
		||||
: maybe-initialize-key ( key hashtable -- )
 | 
			
		||||
    2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -82,6 +82,7 @@ M: end-of-input flag-action ( dfa-traverser flag -- )
 | 
			
		|||
    drop
 | 
			
		||||
    dup end-of-text? [ t >>match-failed? ] unless drop ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
M: beginning-of-line flag-action ( dfa-traverser flag -- )
 | 
			
		||||
    drop
 | 
			
		||||
    dup {
 | 
			
		||||
| 
						 | 
				
			
			@ -96,6 +97,7 @@ M: end-of-line flag-action ( dfa-traverser flag -- )
 | 
			
		|||
        [ next-text-character terminator-class class-member? ]
 | 
			
		||||
    } 1|| [ t >>match-failed? ] unless drop ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
M: word-boundary flag-action ( dfa-traverser flag -- )
 | 
			
		||||
    drop
 | 
			
		||||
    dup {
 | 
			
		||||
| 
						 | 
				
			
			@ -103,6 +105,7 @@ M: word-boundary flag-action ( dfa-traverser flag -- )
 | 
			
		|||
        [ current-text-character terminator-class class-member? ]
 | 
			
		||||
    } 1|| [ t >>match-failed? ] unless drop ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
M: lookahead-on flag-action ( dfa-traverser flag -- )
 | 
			
		||||
    drop
 | 
			
		||||
    lookahead-counters>> 0 swap push ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue