factor/basis/regexp/classes/classes.factor

296 lines
7.5 KiB
Factor
Raw Normal View History

! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
2009-02-21 18:13:11 -05:00
USING: accessors kernel math math.order words combinators locals
ascii unicode.categories combinators.short-circuit sequences
fry macros arrays assocs sets classes ;
IN: regexp.classes
SINGLETONS: any-char any-char-no-nl
letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class
unmatchable-class terminator-class word-boundary-class ;
2009-03-11 16:51:54 -04:00
SINGLETONS: beginning-of-input ^ end-of-input $ end-of-file word-break ;
2009-02-18 13:27:07 -05:00
TUPLE: range from to ;
C: <range> range
GENERIC: class-member? ( obj class -- ? )
M: t class-member? ( obj class -- ? ) 2drop t ;
M: integer class-member? ( obj class -- ? ) = ;
2009-02-18 13:27:07 -05:00
M: range class-member? ( obj class -- ? )
[ from>> ] [ to>> ] bi between? ;
M: any-char class-member? ( obj class -- ? )
2drop t ;
M: any-char-no-nl class-member? ( obj class -- ? )
drop CHAR: \n = not ;
M: letter-class class-member? ( obj class -- ? )
drop letter? ;
M: LETTER-class class-member? ( obj class -- ? )
drop LETTER? ;
M: Letter-class class-member? ( obj class -- ? )
drop Letter? ;
M: ascii-class class-member? ( obj class -- ? )
drop ascii? ;
M: digit-class class-member? ( obj class -- ? )
drop digit? ;
2009-02-20 18:54:48 -05:00
: c-identifier-char? ( ch -- ? )
{ [ alpha? ] [ CHAR: _ = ] } 1|| ;
2008-11-18 16:10:24 -05:00
M: c-identifier-class class-member? ( obj class -- ? )
2009-02-20 18:54:48 -05:00
drop c-identifier-char? ;
2008-11-18 16:10:24 -05:00
M: alpha-class class-member? ( obj class -- ? )
drop alpha? ;
2009-02-20 18:54:48 -05:00
: punct? ( ch -- ? )
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
M: punctuation-class class-member? ( obj class -- ? )
drop punct? ;
2009-02-20 18:54:48 -05:00
: java-printable? ( ch -- ? )
{ [ alpha? ] [ punct? ] } 1|| ;
M: java-printable-class class-member? ( obj class -- ? )
drop java-printable? ;
M: non-newline-blank-class class-member? ( obj class -- ? )
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
M: control-character-class class-member? ( obj class -- ? )
2009-02-15 15:28:22 -05:00
drop control? ;
2009-02-20 18:54:48 -05:00
: hex-digit? ( ch -- ? )
{
[ CHAR: A CHAR: F between? ]
[ CHAR: a CHAR: f between? ]
[ CHAR: 0 CHAR: 9 between? ]
} 1|| ;
M: hex-digit-class class-member? ( obj class -- ? )
drop hex-digit? ;
2009-02-20 18:54:48 -05:00
: java-blank? ( ch -- ? )
{
CHAR: \s CHAR: \t CHAR: \n
HEX: b HEX: 7 CHAR: \r
} member? ;
M: java-blank-class class-member? ( obj class -- ? )
drop java-blank? ;
M: unmatchable-class class-member? ( obj class -- ? )
2drop f ;
M: terminator-class class-member? ( obj class -- ? )
2009-02-20 18:54:48 -05:00
drop "\r\n\u000085\u002029\u002028" member? ;
M: ^ class-member? ( obj class -- ? )
2drop f ;
M: $ class-member? ( obj class -- ? )
2drop f ;
2009-02-21 18:13:11 -05:00
M: f class-member? 2drop f ;
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
TUPLE: or-class seq ;
TUPLE: not-class class ;
TUPLE: and-class seq ;
2009-02-20 18:54:48 -05:00
GENERIC: combine-and ( class1 class2 -- combined ? )
2009-02-21 18:13:11 -05:00
: replace-if-= ( object object -- object ? )
over = ;
M: object combine-and replace-if-= ;
2009-02-21 18:13:11 -05:00
M: t combine-and
drop t ;
2009-02-21 18:13:11 -05:00
M: f combine-and
2009-02-21 18:13:11 -05:00
nip t ;
M: not-class combine-and
class>> 2dup = [ 2drop f t ] [
dup integer? [
2dup swap class-member?
[ 2drop f f ]
[ drop t ] if
] [ 2drop f f ] if
] if ;
M: integer combine-and
swap 2dup class-member? [ drop t ] [ 2drop f t ] if ;
2009-02-21 18:13:11 -05:00
GENERIC: combine-or ( class1 class2 -- combined ? )
2009-02-21 18:13:11 -05:00
M: object combine-or replace-if-= ;
2009-02-21 18:13:11 -05:00
M: t combine-or
2009-03-04 01:36:03 -05:00
nip t ;
2009-02-21 18:13:11 -05:00
M: f combine-or
2009-02-21 18:13:11 -05:00
drop t ;
M: not-class combine-or
class>> = [ t t ] [ f f ] if ;
M: integer combine-or
2dup swap class-member? [ drop t ] [ 2drop f f ] if ;
2009-02-21 18:13:11 -05:00
: flatten ( seq class -- newseq )
'[ dup _ instance? [ seq>> ] [ 1array ] if ] map concat ; inline
2009-02-21 18:13:11 -05:00
: try-combine ( elt1 elt2 quot -- combined/f ? )
3dup call [ [ 3drop ] dip t ] [ drop swapd call ] if ; inline
2009-03-09 16:44:11 -04:00
DEFER: answer
:: try-cancel ( elt1 elt2 empty -- combined/f ? )
[ elt1 elt2 empty answer dup elt1 = not ] try-combine ;
2009-02-21 18:13:11 -05:00
:: prefix-combining ( seq elt quot: ( elt1 elt2 -- combined/f ? ) -- newseq )
f :> combined!
2009-03-09 16:44:11 -04:00
seq [ elt quot call swap combined! ] find drop
2009-02-21 18:13:11 -05:00
[ seq remove-nth combined prefix ]
[ seq elt prefix ] if* ; inline
2009-03-09 16:44:11 -04:00
: combine-by ( seq quot -- new-seq )
{ } swap '[ _ prefix-combining ] reduce ; inline
:: seq>instance ( seq empty class -- instance )
seq length {
{ 0 [ empty ] }
{ 1 [ seq first ] }
[ drop class new seq >>seq ]
} case ; inline
2009-02-21 18:13:11 -05:00
:: combine ( seq quot: ( elt1 elt2 -- combined/f ? ) empty class -- newseq )
seq class flatten
2009-03-09 16:44:11 -04:00
[ quot try-combine ] combine-by
! [ empty try-cancel ] combine-by ! This makes the algorithm O(n^4)
empty class seq>instance ; inline
2009-02-19 19:28:54 -05:00
: <and-class> ( seq -- class )
2009-02-21 18:13:11 -05:00
[ combine-and ] t and-class combine ;
M: and-class class-member?
seq>> [ class-member? ] with all? ;
: <or-class> ( seq -- class )
[ combine-or ] f or-class combine ;
M: or-class class-member?
seq>> [ class-member? ] with any? ;
GENERIC: <not-class> ( class -- inverse )
M: object <not-class>
not-class boa ;
M: not-class <not-class>
class>> ;
M: and-class <not-class>
seq>> [ <not-class> ] map <or-class> ;
M: or-class <not-class>
seq>> [ <not-class> ] map <and-class> ;
M: t <not-class> drop f ;
M: f <not-class> drop t ;
M: not-class class-member?
class>> class-member? not ;
2009-02-19 19:28:54 -05:00
M: primitive-class class-member?
class>> class-member? ;
2009-02-20 18:54:48 -05:00
UNION: class primitive-class not-class or-class and-class range ;
TUPLE: condition question yes no ;
C: <condition> condition
2009-03-09 16:44:11 -04:00
GENERIC# answer 2 ( class from to -- new-class )
2009-03-09 16:44:11 -04:00
M:: object answer ( class from to -- new-class )
class from = to class ? ;
: replace-compound ( class from to -- seq )
2009-03-09 16:44:11 -04:00
[ seq>> ] 2dip '[ _ _ answer ] map ;
2009-03-09 16:44:11 -04:00
M: and-class answer
replace-compound <and-class> ;
2009-03-09 16:44:11 -04:00
M: or-class answer
replace-compound <or-class> ;
2009-03-09 16:44:11 -04:00
M: not-class answer
[ class>> ] 2dip answer <not-class> ;
2009-03-09 16:44:11 -04:00
: assoc-answer ( table question answer -- new-table )
'[ _ _ answer ] assoc-map
[ nip ] assoc-filter ;
2009-03-09 16:44:11 -04:00
: assoc-answers ( table questions answer -- new-table )
'[ _ assoc-answer ] each ;
2009-03-07 17:31:46 -05:00
DEFER: make-condition
: (make-condition) ( table questions question -- condition )
[ 2nip ]
2009-03-09 16:44:11 -04:00
[ swap [ t assoc-answer ] dip make-condition ]
[ swap [ f assoc-answer ] dip make-condition ] 3tri
2dup = [ 2nip ] [ <condition> ] if ;
: make-condition ( table questions -- condition )
[ keys ] [ unclip (make-condition) ] if-empty ;
GENERIC: class>questions ( class -- questions )
: compound-questions ( class -- questions ) seq>> [ class>questions ] gather ;
M: or-class class>questions compound-questions ;
M: and-class class>questions compound-questions ;
M: not-class class>questions class>> class>questions ;
M: object class>questions 1array ;
: table>questions ( table -- questions )
values [ class>questions ] gather >array t swap remove ;
: table>condition ( table -- condition )
! input table is state => class
>alist dup table>questions make-condition ;
: condition-map ( condition quot: ( obj -- obj' ) -- new-condition )
over condition? [
[ [ question>> ] [ yes>> ] [ no>> ] tri ] dip
'[ _ condition-map ] bi@ <condition>
] [ call ] if ; inline recursive
: condition-states ( condition -- states )
dup condition? [
[ yes>> ] [ no>> ] bi
[ condition-states ] bi@ append prune
] [ 1array ] if ;
: condition-at ( condition assoc -- new-condition )
'[ _ at ] condition-map ;