factor/basis/regexp/classes/classes.factor

131 lines
3.3 KiB
Factor
Raw Normal View History

! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
2009-02-18 13:27:07 -05:00
USING: accessors kernel math math.order words
ascii unicode.categories combinators.short-circuit sequences ;
IN: regexp.classes
SINGLETONS: any-char any-char-no-nl
letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class
unmatchable-class terminator-class word-boundary-class ;
SINGLETONS: beginning-of-input beginning-of-line
end-of-input end-of-line ;
2009-02-18 13:27:07 -05:00
TUPLE: range from to ;
C: <range> range
GENERIC: class-member? ( obj class -- ? )
! When does t get put in?
M: t class-member? ( obj class -- ? ) 2drop f ;
M: integer class-member? ( obj class -- ? ) = ;
2009-02-18 13:27:07 -05:00
M: range class-member? ( obj class -- ? )
[ from>> ] [ to>> ] bi between? ;
M: any-char class-member? ( obj class -- ? )
2drop t ;
M: any-char-no-nl class-member? ( obj class -- ? )
drop CHAR: \n = not ;
M: letter-class class-member? ( obj class -- ? )
drop letter? ;
M: LETTER-class class-member? ( obj class -- ? )
drop LETTER? ;
M: Letter-class class-member? ( obj class -- ? )
drop Letter? ;
M: ascii-class class-member? ( obj class -- ? )
drop ascii? ;
M: digit-class class-member? ( obj class -- ? )
drop digit? ;
2009-02-20 18:54:48 -05:00
: c-identifier-char? ( ch -- ? )
{ [ alpha? ] [ CHAR: _ = ] } 1|| ;
2008-11-18 16:10:24 -05:00
M: c-identifier-class class-member? ( obj class -- ? )
2009-02-20 18:54:48 -05:00
drop c-identifier-char? ;
2008-11-18 16:10:24 -05:00
M: alpha-class class-member? ( obj class -- ? )
drop alpha? ;
2009-02-20 18:54:48 -05:00
: punct? ( ch -- ? )
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
M: punctuation-class class-member? ( obj class -- ? )
drop punct? ;
2009-02-20 18:54:48 -05:00
: java-printable? ( ch -- ? )
{ [ alpha? ] [ punct? ] } 1|| ;
M: java-printable-class class-member? ( obj class -- ? )
drop java-printable? ;
M: non-newline-blank-class class-member? ( obj class -- ? )
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
M: control-character-class class-member? ( obj class -- ? )
2009-02-15 15:28:22 -05:00
drop control? ;
2009-02-20 18:54:48 -05:00
: hex-digit? ( ch -- ? )
{
[ CHAR: A CHAR: F between? ]
[ CHAR: a CHAR: f between? ]
[ CHAR: 0 CHAR: 9 between? ]
} 1|| ;
M: hex-digit-class class-member? ( obj class -- ? )
drop hex-digit? ;
2009-02-20 18:54:48 -05:00
: java-blank? ( ch -- ? )
{
CHAR: \s CHAR: \t CHAR: \n
HEX: b HEX: 7 CHAR: \r
} member? ;
M: java-blank-class class-member? ( obj class -- ? )
drop java-blank? ;
M: unmatchable-class class-member? ( obj class -- ? )
2drop f ;
M: terminator-class class-member? ( obj class -- ? )
2009-02-20 18:54:48 -05:00
drop "\r\n\u000085\u002029\u002028" member? ;
M: beginning-of-line class-member? ( obj class -- ? )
2drop f ;
M: end-of-line class-member? ( obj class -- ? )
2drop f ;
TUPLE: or-class seq ;
C: <or-class> or-class
TUPLE: not-class class ;
C: <not-class> not-class
2009-02-20 18:54:48 -05:00
: <and-class> ( classes -- class )
[ <not-class> ] map <or-class> <not-class> ;
2009-02-19 19:28:54 -05:00
TUPLE: primitive-class class ;
C: <primitive-class> primitive-class
M: or-class class-member?
seq>> [ class-member? ] with any? ;
M: not-class class-member?
class>> class-member? not ;
2009-02-19 19:28:54 -05:00
M: primitive-class class-member?
class>> class-member? ;
2009-02-20 18:54:48 -05:00
UNION: class primitive-class not-class or-class range ;