2008-08-26 21:24:14 -04:00
|
|
|
! Copyright (C) 2008 Doug Coleman.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2009-02-18 13:27:07 -05:00
|
|
|
USING: accessors arrays assocs grouping kernel
|
|
|
|
locals math namespaces sequences fry quotations
|
|
|
|
math.order math.ranges vectors unicode.categories
|
|
|
|
regexp.transition-tables words sets
|
|
|
|
unicode.case.private regexp.ast regexp.classes ;
|
2009-01-08 20:07:46 -05:00
|
|
|
! This uses unicode.case.private for ch>upper and ch>lower
|
|
|
|
! but case-insensitive matching should be done by case-folding everything
|
|
|
|
! before processing starts
|
2008-09-18 15:42:16 -04:00
|
|
|
IN: regexp.nfa
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-01-20 12:53:50 -05:00
|
|
|
ERROR: feature-is-broken feature ;
|
|
|
|
|
2009-02-16 21:23:00 -05:00
|
|
|
SYMBOL: negated?
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: negate ( -- )
|
|
|
|
negated? [ not ] change ;
|
|
|
|
|
2008-08-26 21:24:14 -04:00
|
|
|
SINGLETON: eps
|
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: option-stack
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: combine-stack
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: state
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: next-state ( -- state )
|
|
|
|
state [ get ] [ inc ] bi ;
|
2008-11-24 13:59:29 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
SYMBOL: nfa-table
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: set-each ( keys value hashtable -- )
|
|
|
|
'[ _ swap _ set-at ] each ;
|
|
|
|
|
|
|
|
: options>hash ( options -- hashtable )
|
|
|
|
H{ } clone [
|
|
|
|
[ [ on>> t ] dip set-each ]
|
|
|
|
[ [ off>> f ] dip set-each ] 2bi
|
|
|
|
] keep ;
|
|
|
|
|
|
|
|
: using-options ( options quot -- )
|
|
|
|
[ options>hash option-stack [ ?push ] change ] dip
|
|
|
|
call option-stack get pop* ; inline
|
|
|
|
|
|
|
|
: option? ( obj -- ? )
|
|
|
|
option-stack get assoc-stack ;
|
|
|
|
|
|
|
|
: set-start-state ( -- nfa-table )
|
|
|
|
nfa-table get
|
|
|
|
combine-stack get pop first >>start-state ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
GENERIC: nfa-node ( node -- )
|
|
|
|
|
|
|
|
:: add-simple-entry ( obj class -- )
|
2009-02-18 13:27:07 -05:00
|
|
|
[let* | s0 [ next-state ]
|
|
|
|
s1 [ next-state ]
|
|
|
|
stack [ combine-stack get ]
|
|
|
|
table [ nfa-table get ] |
|
2009-02-16 21:23:00 -05:00
|
|
|
negated? get [
|
2008-08-27 17:22:34 -04:00
|
|
|
s0 f obj class make-transition table add-transition
|
2008-08-26 21:24:14 -04:00
|
|
|
s0 s1 <default-transition> table add-transition
|
|
|
|
] [
|
2008-08-27 17:22:34 -04:00
|
|
|
s0 s1 obj class make-transition table add-transition
|
2008-08-26 21:24:14 -04:00
|
|
|
] if
|
|
|
|
s0 s1 2array stack push
|
|
|
|
t s1 table final-states>> set-at ] ;
|
|
|
|
|
|
|
|
:: concatenate-nodes ( -- )
|
2009-02-18 13:27:07 -05:00
|
|
|
[let* | stack [ combine-stack get ]
|
|
|
|
table [ nfa-table get ]
|
2008-08-26 21:24:14 -04:00
|
|
|
s2 [ stack peek first ]
|
|
|
|
s3 [ stack pop second ]
|
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ] |
|
|
|
|
s1 s2 eps <literal-transition> table add-transition
|
|
|
|
s1 table final-states>> delete-at
|
|
|
|
s0 s3 2array stack push ] ;
|
|
|
|
|
|
|
|
:: alternate-nodes ( -- )
|
2009-02-18 13:27:07 -05:00
|
|
|
[let* | stack [ combine-stack get ]
|
|
|
|
table [ nfa-table get ]
|
2008-08-26 21:24:14 -04:00
|
|
|
s2 [ stack peek first ]
|
|
|
|
s3 [ stack pop second ]
|
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ]
|
2009-02-18 13:27:07 -05:00
|
|
|
s4 [ next-state ]
|
|
|
|
s5 [ next-state ] |
|
2008-08-26 21:24:14 -04:00
|
|
|
s4 s0 eps <literal-transition> table add-transition
|
|
|
|
s4 s2 eps <literal-transition> table add-transition
|
|
|
|
s1 s5 eps <literal-transition> table add-transition
|
|
|
|
s3 s5 eps <literal-transition> table add-transition
|
|
|
|
s1 table final-states>> delete-at
|
|
|
|
s3 table final-states>> delete-at
|
|
|
|
t s5 table final-states>> set-at
|
|
|
|
s4 s5 2array stack push ] ;
|
|
|
|
|
2009-02-16 21:23:00 -05:00
|
|
|
M: star nfa-node ( node -- )
|
2008-08-26 21:24:14 -04:00
|
|
|
term>> nfa-node
|
2009-02-18 13:27:07 -05:00
|
|
|
[let* | stack [ combine-stack get ]
|
2008-08-26 21:24:14 -04:00
|
|
|
s0 [ stack peek first ]
|
|
|
|
s1 [ stack pop second ]
|
2009-02-18 13:27:07 -05:00
|
|
|
s2 [ next-state ]
|
|
|
|
s3 [ next-state ]
|
|
|
|
table [ nfa-table get ] |
|
2008-08-26 21:24:14 -04:00
|
|
|
s1 table final-states>> delete-at
|
|
|
|
t s3 table final-states>> set-at
|
|
|
|
s1 s0 eps <literal-transition> table add-transition
|
|
|
|
s2 s0 eps <literal-transition> table add-transition
|
|
|
|
s2 s3 eps <literal-transition> table add-transition
|
|
|
|
s1 s3 eps <literal-transition> table add-transition
|
|
|
|
s2 s3 2array stack push ] ;
|
|
|
|
|
|
|
|
M: concatenation nfa-node ( node -- )
|
2009-02-18 13:27:07 -05:00
|
|
|
seq>> [ eps literal-transition add-simple-entry ] [
|
|
|
|
reversed-regexp option? [ <reversed> ] when
|
|
|
|
[ [ nfa-node ] each ]
|
|
|
|
[ length 1- [ concatenate-nodes ] times ] bi
|
|
|
|
] if-empty ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
|
|
|
M: alternation nfa-node ( node -- )
|
|
|
|
seq>>
|
|
|
|
[ [ nfa-node ] each ]
|
|
|
|
[ length 1- [ alternate-nodes ] times ] bi ;
|
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
M: integer nfa-node ( node -- )
|
2008-11-24 23:17:47 -05:00
|
|
|
case-insensitive option? [
|
2009-02-18 13:27:07 -05:00
|
|
|
dup [ ch>lower ] [ ch>upper ] bi
|
2008-11-24 23:17:47 -05:00
|
|
|
2dup = [
|
|
|
|
2drop
|
2009-02-18 13:27:07 -05:00
|
|
|
literal-transition add-simple-entry
|
2008-11-24 23:17:47 -05:00
|
|
|
] [
|
|
|
|
[ literal-transition add-simple-entry ] bi@
|
|
|
|
alternate-nodes drop
|
|
|
|
] if
|
|
|
|
] [
|
2009-02-18 13:27:07 -05:00
|
|
|
literal-transition add-simple-entry
|
2008-11-24 23:17:47 -05:00
|
|
|
] if ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
M: primitive-class nfa-node ( node -- )
|
|
|
|
class>> dup
|
|
|
|
{ letter-class LETTER-class } member? case-insensitive option? and
|
|
|
|
[ drop Letter-class ] when
|
|
|
|
class-transition add-simple-entry ;
|
2008-11-24 23:17:47 -05:00
|
|
|
|
|
|
|
M: any-char nfa-node ( node -- )
|
|
|
|
[ dotall option? ] dip any-char-no-nl ?
|
2008-08-26 21:24:14 -04:00
|
|
|
class-transition add-simple-entry ;
|
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
M: negation nfa-node ( node -- )
|
|
|
|
negate term>> nfa-node negate ;
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
M: range nfa-node ( node -- )
|
2008-11-24 23:17:47 -05:00
|
|
|
case-insensitive option? [
|
2009-01-08 20:07:46 -05:00
|
|
|
! This should be implemented for Unicode by case-folding
|
|
|
|
! the input and all strings in the regexp.
|
2008-11-24 23:17:47 -05:00
|
|
|
dup [ from>> ] [ to>> ] bi
|
|
|
|
2dup [ Letter? ] bi@ and [
|
|
|
|
rot drop
|
2009-02-18 13:27:07 -05:00
|
|
|
[ [ ch>lower ] bi@ <range> ]
|
|
|
|
[ [ ch>upper ] bi@ <range> ] 2bi
|
2008-11-24 23:17:47 -05:00
|
|
|
[ class-transition add-simple-entry ] bi@
|
|
|
|
alternate-nodes
|
|
|
|
] [
|
|
|
|
2drop
|
|
|
|
class-transition add-simple-entry
|
|
|
|
] if
|
|
|
|
] [
|
|
|
|
class-transition add-simple-entry
|
|
|
|
] if ;
|
2008-08-26 21:24:14 -04:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
M: with-options nfa-node ( node -- )
|
|
|
|
dup options>> [ tree>> nfa-node ] using-options ;
|
2008-11-24 23:17:47 -05:00
|
|
|
|
2009-02-18 13:27:07 -05:00
|
|
|
: construct-nfa ( ast -- nfa-table )
|
2008-08-26 21:24:14 -04:00
|
|
|
[
|
2009-02-18 13:27:07 -05:00
|
|
|
negated? off
|
|
|
|
V{ } clone combine-stack set
|
|
|
|
0 state set
|
|
|
|
<transition-table> clone nfa-table set
|
|
|
|
nfa-node
|
|
|
|
set-start-state
|
2008-08-26 21:24:14 -04:00
|
|
|
] with-scope ;
|