2008-08-26 21:24:14 -04:00
|
|
|
! Copyright (C) 2008 Doug Coleman.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2008-11-22 18:31:40 -05:00
|
|
|
USING: accessors assocs combinators kernel math
|
2008-09-22 21:09:42 -04:00
|
|
|
quotations sequences regexp.parser regexp.classes fry arrays
|
2008-11-06 14:16:33 -05:00
|
|
|
combinators.short-circuit regexp.utils prettyprint regexp.nfa
|
|
|
|
shuffle ;
|
2008-09-18 15:42:16 -04:00
|
|
|
IN: regexp.traversal
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
TUPLE: dfa-traverser
|
|
|
|
dfa-table
|
2008-08-28 14:45:04 -04:00
|
|
|
traversal-flags
|
2008-09-22 14:37:27 -04:00
|
|
|
traverse-forward
|
2008-09-21 22:45:27 -04:00
|
|
|
lookahead-counters
|
2008-09-22 14:37:27 -04:00
|
|
|
lookbehind-counters
|
2008-09-22 21:09:42 -04:00
|
|
|
capture-counters
|
|
|
|
captured-groups
|
|
|
|
capture-group-index
|
2008-08-26 21:24:14 -04:00
|
|
|
last-state current-state
|
|
|
|
text
|
2008-11-24 01:18:27 -05:00
|
|
|
match-failed?
|
2008-08-26 21:24:14 -04:00
|
|
|
start-index current-index
|
|
|
|
matches ;
|
|
|
|
|
|
|
|
: <dfa-traverser> ( text regexp -- match )
|
2008-09-09 20:16:11 -04:00
|
|
|
[ dfa-table>> ] [ dfa-traversal-flags>> ] bi
|
2008-08-26 21:24:14 -04:00
|
|
|
dfa-traverser new
|
2008-08-28 14:45:04 -04:00
|
|
|
swap >>traversal-flags
|
2008-11-06 14:16:33 -05:00
|
|
|
swap [ start-state>> >>current-state ] [ >>dfa-table ] bi
|
2008-08-26 21:24:14 -04:00
|
|
|
swap >>text
|
2008-09-22 14:37:27 -04:00
|
|
|
t >>traverse-forward
|
2008-08-26 21:24:14 -04:00
|
|
|
0 >>start-index
|
|
|
|
0 >>current-index
|
2008-09-22 21:09:42 -04:00
|
|
|
0 >>capture-group-index
|
2008-08-28 14:45:04 -04:00
|
|
|
V{ } clone >>matches
|
2008-09-22 21:09:42 -04:00
|
|
|
V{ } clone >>capture-counters
|
2008-09-22 14:37:27 -04:00
|
|
|
V{ } clone >>lookbehind-counters
|
2008-09-22 21:09:42 -04:00
|
|
|
V{ } clone >>lookahead-counters
|
|
|
|
H{ } clone >>captured-groups ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: final-state? ( dfa-traverser -- ? )
|
2008-11-24 01:18:27 -05:00
|
|
|
[ current-state>> ]
|
|
|
|
[ dfa-table>> final-states>> ] bi key? ;
|
|
|
|
|
|
|
|
: beginning-of-text? ( dfa-traverser -- ? )
|
|
|
|
current-index>> 0 <= ; inline
|
|
|
|
|
|
|
|
: end-of-text? ( dfa-traverser -- ? )
|
|
|
|
[ current-index>> ] [ text>> length ] bi >= ; inline
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: text-finished? ( dfa-traverser -- ? )
|
2008-09-22 15:55:17 -04:00
|
|
|
{
|
|
|
|
[ current-state>> empty? ]
|
2008-11-24 01:18:27 -05:00
|
|
|
[ end-of-text? ]
|
|
|
|
[ match-failed?>> ]
|
2008-09-22 15:55:17 -04:00
|
|
|
} 1|| ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: save-final-state ( dfa-straverser -- )
|
|
|
|
[ current-index>> ] [ matches>> ] bi push ;
|
|
|
|
|
|
|
|
: match-done? ( dfa-traverser -- ? )
|
|
|
|
dup final-state? [
|
|
|
|
dup save-final-state
|
|
|
|
] when text-finished? ;
|
|
|
|
|
2008-11-24 01:18:27 -05:00
|
|
|
: previous-text-character ( dfa-traverser -- ch )
|
|
|
|
[ text>> ] [ current-index>> 1- ] bi nth ;
|
|
|
|
|
|
|
|
: current-text-character ( dfa-traverser -- ch )
|
|
|
|
[ text>> ] [ current-index>> ] bi nth ;
|
|
|
|
|
|
|
|
: next-text-character ( dfa-traverser -- ch )
|
|
|
|
[ text>> ] [ current-index>> 1+ ] bi nth ;
|
|
|
|
|
2008-09-21 22:45:27 -04:00
|
|
|
GENERIC: flag-action ( dfa-traverser flag -- )
|
|
|
|
|
2008-11-24 01:18:27 -05:00
|
|
|
|
|
|
|
M: beginning-of-input flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup beginning-of-text? [ t >>match-failed? ] unless drop ;
|
|
|
|
|
|
|
|
M: end-of-input flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup end-of-text? [ t >>match-failed? ] unless drop ;
|
|
|
|
|
2008-11-24 13:59:29 -05:00
|
|
|
|
2008-11-24 01:18:27 -05:00
|
|
|
M: beginning-of-line flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup {
|
|
|
|
[ beginning-of-text? ]
|
|
|
|
[ previous-text-character terminator-class class-member? ]
|
|
|
|
} 1|| [ t >>match-failed? ] unless drop ;
|
|
|
|
|
|
|
|
M: end-of-line flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup {
|
|
|
|
[ end-of-text? ]
|
|
|
|
[ next-text-character terminator-class class-member? ]
|
|
|
|
} 1|| [ t >>match-failed? ] unless drop ;
|
|
|
|
|
2008-11-24 13:59:29 -05:00
|
|
|
|
2008-11-24 01:18:27 -05:00
|
|
|
M: word-boundary flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup {
|
|
|
|
[ end-of-text? ]
|
|
|
|
[ current-text-character terminator-class class-member? ]
|
|
|
|
} 1|| [ t >>match-failed? ] unless drop ;
|
|
|
|
|
2008-11-24 13:59:29 -05:00
|
|
|
|
2008-09-21 22:45:27 -04:00
|
|
|
M: lookahead-on flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
lookahead-counters>> 0 swap push ;
|
|
|
|
|
|
|
|
M: lookahead-off flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
2008-09-22 14:37:27 -04:00
|
|
|
dup lookahead-counters>>
|
|
|
|
[ drop ] [ pop '[ _ - ] change-current-index drop ] if-empty ;
|
|
|
|
|
|
|
|
M: lookbehind-on flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
f >>traverse-forward
|
2008-09-22 15:55:17 -04:00
|
|
|
[ 2 - ] change-current-index
|
2008-09-22 14:37:27 -04:00
|
|
|
lookbehind-counters>> 0 swap push ;
|
|
|
|
|
|
|
|
M: lookbehind-off flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
t >>traverse-forward
|
|
|
|
dup lookbehind-counters>>
|
2008-09-22 15:55:17 -04:00
|
|
|
[ drop ] [ pop '[ _ + 2 + ] change-current-index drop ] if-empty ;
|
2008-09-21 22:45:27 -04:00
|
|
|
|
2008-09-22 21:09:42 -04:00
|
|
|
M: capture-group-on flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
[ current-index>> 0 2array ]
|
|
|
|
[ capture-counters>> ] bi push ;
|
|
|
|
|
|
|
|
M: capture-group-off flag-action ( dfa-traverser flag -- )
|
|
|
|
drop
|
|
|
|
dup capture-counters>> empty? [
|
|
|
|
drop
|
|
|
|
] [
|
|
|
|
{
|
|
|
|
[ capture-counters>> pop first2 dupd + ]
|
|
|
|
[ text>> <slice> ]
|
|
|
|
[ [ 1+ ] change-capture-group-index capture-group-index>> ]
|
|
|
|
[ captured-groups>> set-at ]
|
|
|
|
} cleave
|
|
|
|
] if ;
|
|
|
|
|
2008-09-21 22:45:27 -04:00
|
|
|
: process-flags ( dfa-traverser -- )
|
|
|
|
[ [ 1+ ] map ] change-lookahead-counters
|
2008-09-22 15:55:17 -04:00
|
|
|
[ [ 1+ ] map ] change-lookbehind-counters
|
2008-09-22 21:09:42 -04:00
|
|
|
[ [ first2 1+ 2array ] map ] change-capture-counters
|
2008-09-22 15:55:17 -04:00
|
|
|
! dup current-state>> .
|
2008-09-19 18:54:34 -04:00
|
|
|
dup [ current-state>> ] [ traversal-flags>> ] bi
|
2008-11-01 20:39:49 -04:00
|
|
|
at [ flag-action ] with each ;
|
2008-09-19 18:54:34 -04:00
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
: increment-state ( dfa-traverser state -- dfa-traverser )
|
2008-09-12 22:56:25 -04:00
|
|
|
[
|
2008-09-22 14:37:27 -04:00
|
|
|
dup traverse-forward>>
|
2008-11-17 22:42:59 -05:00
|
|
|
[ [ 1+ ] change-current-index ]
|
|
|
|
[ [ 1- ] change-current-index ] if
|
2008-09-22 14:37:27 -04:00
|
|
|
dup current-state>> >>last-state
|
2008-11-24 01:18:27 -05:00
|
|
|
] [ first ] bi* >>current-state ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: match-literal ( transition from-state table -- to-state/f )
|
2008-11-06 14:16:33 -05:00
|
|
|
transitions>> at at ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: match-class ( transition from-state table -- to-state/f )
|
|
|
|
transitions>> at* [
|
|
|
|
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
|
|
|
] [ drop ] if ;
|
|
|
|
|
|
|
|
: match-default ( transition from-state table -- to-state/f )
|
2008-11-06 14:16:33 -05:00
|
|
|
nipd transitions>> at t swap at ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: match-transition ( obj from-state dfa -- to-state/f )
|
|
|
|
{ [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
|
|
|
|
|
|
|
|
: setup-match ( match -- obj state dfa-table )
|
2008-11-24 01:18:27 -05:00
|
|
|
[ [ current-index>> ] [ text>> ] bi nth ]
|
|
|
|
[ current-state>> ]
|
|
|
|
[ dfa-table>> ] tri ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
: do-match ( dfa-traverser -- dfa-traverser )
|
2008-09-21 22:45:27 -04:00
|
|
|
dup process-flags
|
2008-08-26 21:24:14 -04:00
|
|
|
dup match-done? [
|
|
|
|
dup setup-match match-transition
|
|
|
|
[ increment-state do-match ] when*
|
|
|
|
] unless ;
|
|
|
|
|
2008-11-22 18:30:16 -05:00
|
|
|
: return-match ( dfa-traverser -- slice/f )
|
2008-08-26 21:24:14 -04:00
|
|
|
dup matches>>
|
|
|
|
[ drop f ]
|
2008-11-22 18:30:16 -05:00
|
|
|
[
|
|
|
|
[ [ text>> ] [ start-index>> ] bi ]
|
|
|
|
[ peek ] bi* rot <slice>
|
|
|
|
] if-empty ;
|