2008-08-26 21:24:14 -04:00
|
|
|
! Copyright (C) 2008 Doug Coleman.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2009-02-18 13:27:07 -05:00
|
|
|
USING: accessors arrays assocs grouping kernel
|
|
|
|
locals math namespaces sequences fry quotations
|
|
|
|
math.order math.ranges vectors unicode.categories
|
2009-02-20 18:54:48 -05:00
|
|
|
regexp.transition-tables words sets hashtables combinators.short-circuit
|
2009-02-18 13:27:07 -05:00
|
|
|
unicode.case.private regexp.ast regexp.classes ;
|
2009-02-20 18:54:48 -05:00
|
|
|
IN: regexp.nfa
|
|
|
|
|
2009-01-08 20:07:46 -05:00
|
|
|
! This uses unicode.case.private for ch>upper and ch>lower
|
|
|
|
! but case-insensitive matching should be done by case-folding everything
|
|
|
|
! before processing starts
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-20 18:54:48 -05:00
|
|
|
GENERIC: remove-lookahead ( syntax-tree -- syntax-tree' )
|
|
|
|
! This is unfinished and does nothing right now!
|
|
|
|
|
|
|
|
M: object remove-lookahead ;
|
|
|
|
|
|
|
|
M: with-options remove-lookahead
|
|
|
|
[ tree>> remove-lookahead ] [ options>> ] bi <with-options> ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-20 18:54:48 -05:00
|
|
|
M: alternation remove-lookahead
|
2009-02-21 13:09:41 -05:00
|
|
|
[ first>> ] [ second>> ] bi [ remove-lookahead ] bi@ alternation boa ;
|
2009-02-20 18:54:48 -05:00
|
|
|
|
|
|
|
M: concatenation remove-lookahead ;
|
2009-02-18 13:27:07 -05:00
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
SINGLETON: eps
|
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: option-stack
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: state
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: next-state ( -- state )
|
|
|
|
state [ get ] [ inc ] bi ;
|
2008-11-24 13:59:29 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: nfa-table
|
2009-02-19 17:48:46 -05:00
|
|
|
: table ( -- table ) nfa-table get ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: set-each ( keys value hashtable -- )
|
|
|
|
'[ _ swap _ set-at ] each ;
|
|
|
|
|
|
|
|
: options>hash ( options -- hashtable )
|
|
|
|
H{ } clone [
|
|
|
|
[ [ on>> t ] dip set-each ]
|
|
|
|
[ [ off>> f ] dip set-each ] 2bi
|
|
|
|
] keep ;
|
|
|
|
|
|
|
|
: using-options ( options quot -- )
|
|
|
|
[ options>hash option-stack [ ?push ] change ] dip
|
|
|
|
call option-stack get pop* ; inline
|
|
|
|
|
|
|
|
: option? ( obj -- ? )
|
|
|
|
option-stack get assoc-stack ;
|
|
|
|
|
2009-02-19 17:48:46 -05:00
|
|
|
GENERIC: nfa-node ( node -- start-state end-state )
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-20 18:54:48 -05:00
|
|
|
: add-simple-entry ( obj class -- start-state end-state )
|
|
|
|
[ next-state next-state 2dup ] 2dip
|
|
|
|
make-transition table add-transition ;
|
2009-02-19 17:48:46 -05:00
|
|
|
|
|
|
|
: epsilon-transition ( source target -- )
|
|
|
|
eps <literal-transition> table add-transition ;
|
|
|
|
|
|
|
|
M:: star nfa-node ( node -- start end )
|
|
|
|
node term>> nfa-node :> s1 :> s0
|
|
|
|
next-state :> s2
|
|
|
|
next-state :> s3
|
|
|
|
s1 s0 epsilon-transition
|
|
|
|
s2 s0 epsilon-transition
|
|
|
|
s2 s3 epsilon-transition
|
|
|
|
s1 s3 epsilon-transition
|
|
|
|
s2 s3 ;
|
|
|
|
|
|
|
|
M: epsilon nfa-node
|
|
|
|
drop eps literal-transition add-simple-entry ;
|
|
|
|
|
|
|
|
M: concatenation nfa-node ( node -- start end )
|
|
|
|
[ first>> ] [ second>> ] bi
|
|
|
|
reversed-regexp option? [ swap ] when
|
|
|
|
[ nfa-node ] bi@
|
|
|
|
[ epsilon-transition ] dip ;
|
|
|
|
|
|
|
|
:: alternate-nodes ( s0 s1 s2 s3 -- start end )
|
|
|
|
next-state :> s4
|
|
|
|
next-state :> s5
|
|
|
|
s4 s0 epsilon-transition
|
|
|
|
s4 s2 epsilon-transition
|
|
|
|
s1 s5 epsilon-transition
|
|
|
|
s3 s5 epsilon-transition
|
|
|
|
s4 s5 ;
|
|
|
|
|
|
|
|
M: alternation nfa-node ( node -- start end )
|
|
|
|
[ first>> ] [ second>> ] bi
|
|
|
|
[ nfa-node ] bi@
|
|
|
|
alternate-nodes ;
|
|
|
|
|
2009-02-20 18:54:48 -05:00
|
|
|
GENERIC: modify-class ( char-class -- char-class' )
|
|
|
|
|
|
|
|
M: object modify-class ;
|
|
|
|
|
|
|
|
M: integer modify-class
|
2008-11-24 23:17:47 -05:00
|
|
|
case-insensitive option? [
|
2009-02-20 18:54:48 -05:00
|
|
|
dup Letter? [
|
|
|
|
[ ch>lower ] [ ch>upper ] bi 2array <or-class>
|
|
|
|
] when
|
|
|
|
] when ;
|
|
|
|
|
|
|
|
M: integer nfa-node ( node -- start end )
|
|
|
|
modify-class dup class?
|
|
|
|
class-transition literal-transition ?
|
|
|
|
add-simple-entry ;
|
|
|
|
|
|
|
|
M: primitive-class modify-class
|
|
|
|
class>> modify-class <primitive-class> ;
|
|
|
|
|
|
|
|
M: or-class modify-class
|
|
|
|
seq>> [ modify-class ] map <or-class> ;
|
|
|
|
|
|
|
|
M: not-class modify-class
|
|
|
|
class>> modify-class <not-class> ;
|
|
|
|
|
|
|
|
M: any-char modify-class
|
|
|
|
[ dotall option? ] dip any-char-no-nl ? ;
|
|
|
|
|
|
|
|
: modify-letter-class ( class -- newclass )
|
|
|
|
case-insensitive option? [ drop Letter-class ] when ;
|
|
|
|
M: letter-class modify-class modify-letter-class ;
|
|
|
|
M: LETTER-class modify-class modify-letter-class ;
|
|
|
|
|
|
|
|
: cased-range? ( range -- ? )
|
|
|
|
[ from>> ] [ to>> ] bi {
|
|
|
|
[ [ letter? ] bi@ and ]
|
|
|
|
[ [ LETTER? ] bi@ and ]
|
|
|
|
} 2|| ;
|
|
|
|
|
|
|
|
M: range modify-class
|
2008-11-24 23:17:47 -05:00
|
|
|
case-insensitive option? [
|
2009-02-20 18:54:48 -05:00
|
|
|
dup cased-range? [
|
|
|
|
[ from>> ] [ to>> ] bi
|
2009-02-18 13:27:07 -05:00
|
|
|
[ [ ch>lower ] bi@ <range> ]
|
|
|
|
[ [ ch>upper ] bi@ <range> ] 2bi
|
2009-02-20 18:54:48 -05:00
|
|
|
2array <or-class>
|
|
|
|
] when
|
|
|
|
] when ;
|
|
|
|
|
|
|
|
M: class nfa-node
|
|
|
|
modify-class class-transition add-simple-entry ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-19 17:48:46 -05:00
|
|
|
M: with-options nfa-node ( node -- start end )
|
2009-02-18 13:27:07 -05:00
|
|
|
dup options>> [ tree>> nfa-node ] using-options ;
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: construct-nfa ( ast -- nfa-table )
|
2008-08-26 21:24:14 -04:00
|
|
|
[
|
2009-02-18 13:27:07 -05:00
|
|
|
0 state set
|
2009-02-20 18:54:48 -05:00
|
|
|
<transition-table> nfa-table set
|
|
|
|
remove-lookahead nfa-node
|
2009-02-19 17:48:46 -05:00
|
|
|
table
|
|
|
|
swap dup associate >>final-states
|
|
|
|
swap >>start-state
|
2008-08-26 21:24:14 -04:00
|
|
|
] with-scope ;
|