factor/basis/regexp/compiler/compiler.factor

82 lines
2.3 KiB
Factor
Raw Normal View History

! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
2009-02-26 19:06:57 -05:00
USING: regexp.classes kernel sequences regexp.negation
quotations regexp.minimize assocs fry math locals combinators
2009-02-26 19:06:57 -05:00
accessors words compiler.units kernel.private strings
sequences.private arrays regexp.matchers call ;
IN: regexp.compiler
: literals>cases ( literal-transitions -- case-body )
[ 1quotation ] assoc-map ;
: non-literals>dispatch ( non-literal-transitions -- quot )
2009-02-26 19:06:57 -05:00
[ [ '[ dup _ class-member? ] ] [ '[ drop _ execute ] ] bi* ] assoc-map
[ 3drop ] suffix '[ _ cond ] ;
: expand-one-or ( or-class transition -- alist )
[ seq>> ] dip '[ _ 2array ] map ;
: expand-or ( alist -- new-alist )
[
first2 over or-class?
[ expand-one-or ] [ 2array 1array ] if
] map concat ;
: split-literals ( transitions -- case default )
2009-02-26 19:06:57 -05:00
>alist expand-or [ first integer? ] partition
[ literals>cases ] [ non-literals>dispatch ] bi* ;
2009-02-26 19:06:57 -05:00
:: step ( last-match index str case-body final? -- last-index/f )
final? index last-match ?
index str bounds-check? [
index 1+ str
index str nth-unsafe
case-body case
2009-02-26 19:06:57 -05:00
] when ; inline
: transitions>quot ( transitions final-state? -- quot )
[ split-literals suffix ] dip
2009-02-26 19:06:57 -05:00
'[ _ _ step ] ;
! '[ { array-capacity string } declare _ _ step ] ;
: word>quot ( word dfa -- quot )
[ transitions>> at ]
[ final-states>> key? ] 2bi
transitions>quot ;
: states>code ( words dfa -- )
'[
[
dup _ word>quot
2009-02-26 19:06:57 -05:00
(( last-match index string -- ? ))
define-declared
] each
] with-compilation-unit ;
: transitions-at ( transitions assoc -- new-transitions )
dup '[
[ _ at ]
[ [ _ at ] assoc-map ] bi*
] assoc-map ;
: states>words ( dfa -- words dfa )
dup transitions>> keys [ gensym ] H{ } map>assoc
[ [ transitions-at ] rewrite-transitions ]
[ values ]
bi swap ;
: dfa>word ( dfa -- word )
states>words [ states>code ] keep start-state>> ;
: run-regexp ( string word -- ? )
2009-02-26 19:06:57 -05:00
[ f 0 ] 2dip execute ; inline
: dfa>quotation ( dfa -- quot )
dfa>word '[ _ run-regexp ] ;
TUPLE: quot-matcher quot ;
C: <quot-matcher> quot-matcher
2009-02-26 19:06:57 -05:00
M: quot-matcher match-index
quot>> call( string -- i/f ) ;