2009-02-26 15:19:02 -05:00
|
|
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2009-02-26 19:06:57 -05:00
|
|
|
USING: regexp.classes kernel sequences regexp.negation
|
2009-02-26 15:19:02 -05:00
|
|
|
quotations regexp.minimize assocs fry math locals combinators
|
2009-02-26 19:06:57 -05:00
|
|
|
accessors words compiler.units kernel.private strings
|
|
|
|
sequences.private arrays regexp.matchers call ;
|
2009-02-26 15:19:02 -05:00
|
|
|
IN: regexp.compiler
|
|
|
|
|
|
|
|
: literals>cases ( literal-transitions -- case-body )
|
|
|
|
[ 1quotation ] assoc-map ;
|
|
|
|
|
2009-03-04 14:22:22 -05:00
|
|
|
: condition>quot ( condition -- quot )
|
|
|
|
dup condition? [
|
|
|
|
[ question>> ] [ yes>> ] [ no>> ] tri
|
|
|
|
[ condition>quot ] bi@
|
|
|
|
'[ dup _ class-member? _ _ if ]
|
|
|
|
] [
|
|
|
|
[ [ 3drop ] ] [ '[ drop _ execute ] ] if-empty
|
|
|
|
] if ;
|
|
|
|
|
2009-02-26 15:19:02 -05:00
|
|
|
: non-literals>dispatch ( non-literal-transitions -- quot )
|
2009-03-04 14:22:22 -05:00
|
|
|
table>condition condition>quot ;
|
2009-02-26 19:06:57 -05:00
|
|
|
|
|
|
|
: expand-one-or ( or-class transition -- alist )
|
|
|
|
[ seq>> ] dip '[ _ 2array ] map ;
|
|
|
|
|
|
|
|
: expand-or ( alist -- new-alist )
|
|
|
|
[
|
|
|
|
first2 over or-class?
|
|
|
|
[ expand-one-or ] [ 2array 1array ] if
|
|
|
|
] map concat ;
|
2009-02-26 15:19:02 -05:00
|
|
|
|
|
|
|
: split-literals ( transitions -- case default )
|
2009-02-26 19:06:57 -05:00
|
|
|
>alist expand-or [ first integer? ] partition
|
2009-02-26 15:19:02 -05:00
|
|
|
[ literals>cases ] [ non-literals>dispatch ] bi* ;
|
|
|
|
|
2009-02-26 19:06:57 -05:00
|
|
|
:: step ( last-match index str case-body final? -- last-index/f )
|
|
|
|
final? index last-match ?
|
2009-02-26 15:19:02 -05:00
|
|
|
index str bounds-check? [
|
|
|
|
index 1+ str
|
|
|
|
index str nth-unsafe
|
|
|
|
case-body case
|
2009-02-26 19:06:57 -05:00
|
|
|
] when ; inline
|
2009-02-26 15:19:02 -05:00
|
|
|
|
|
|
|
: transitions>quot ( transitions final-state? -- quot )
|
|
|
|
[ split-literals suffix ] dip
|
2009-03-04 14:22:22 -05:00
|
|
|
'[ { array-capacity sequence } declare _ _ step ] ;
|
2009-02-26 15:19:02 -05:00
|
|
|
|
|
|
|
: word>quot ( word dfa -- quot )
|
|
|
|
[ transitions>> at ]
|
|
|
|
[ final-states>> key? ] 2bi
|
|
|
|
transitions>quot ;
|
|
|
|
|
|
|
|
: states>code ( words dfa -- )
|
|
|
|
'[
|
|
|
|
[
|
|
|
|
dup _ word>quot
|
2009-02-26 19:06:57 -05:00
|
|
|
(( last-match index string -- ? ))
|
|
|
|
define-declared
|
2009-02-26 15:19:02 -05:00
|
|
|
] each
|
|
|
|
] with-compilation-unit ;
|
|
|
|
|
|
|
|
: transitions-at ( transitions assoc -- new-transitions )
|
|
|
|
dup '[
|
|
|
|
[ _ at ]
|
|
|
|
[ [ _ at ] assoc-map ] bi*
|
|
|
|
] assoc-map ;
|
|
|
|
|
|
|
|
: states>words ( dfa -- words dfa )
|
|
|
|
dup transitions>> keys [ gensym ] H{ } map>assoc
|
|
|
|
[ [ transitions-at ] rewrite-transitions ]
|
|
|
|
[ values ]
|
|
|
|
bi swap ;
|
|
|
|
|
|
|
|
: dfa>word ( dfa -- word )
|
|
|
|
states>words [ states>code ] keep start-state>> ;
|
|
|
|
|
2009-03-04 14:22:22 -05:00
|
|
|
: check-sequence ( string -- string )
|
|
|
|
! Make this configurable
|
|
|
|
dup sequence? [ "String required" throw ] unless ;
|
2009-02-26 23:14:41 -05:00
|
|
|
|
|
|
|
: run-regexp ( start-index string word -- ? )
|
2009-03-04 14:22:22 -05:00
|
|
|
{ [ f ] [ >fixnum ] [ check-sequence ] [ execute ] } spread ; inline
|
2009-02-26 19:06:57 -05:00
|
|
|
|
|
|
|
: dfa>quotation ( dfa -- quot )
|
|
|
|
dfa>word '[ _ run-regexp ] ;
|
|
|
|
|
|
|
|
TUPLE: quot-matcher quot ;
|
|
|
|
C: <quot-matcher> quot-matcher
|
2009-02-26 15:19:02 -05:00
|
|
|
|
2009-02-26 23:14:41 -05:00
|
|
|
M: quot-matcher match-index-from
|
|
|
|
quot>> call( index string -- i/f ) ;
|