factor/basis/pcre/pcre.factor

112 lines
3.1 KiB
Factor
Raw Normal View History

2013-09-18 16:45:17 -04:00
USING:
accessors
alien.c-types alien.data alien.strings
arrays
assocs
fry
grouping
io.encodings.utf8 io.encodings.string
kernel
math
mirrors
pcre.ffi pcre.info
sequences sequences.generalizations
sets.private
strings ;
QUALIFIED: splitting
2013-09-18 16:45:17 -04:00
IN: pcre
ERROR: malformed-regexp expr error ;
ERROR: pcre-error value ;
2013-09-18 16:45:17 -04:00
TUPLE: compiled-pcre pcre extra nametable ;
! Gen. utility
: replace-all ( seq subseqs new -- seq )
swapd '[ _ splitting:replace ] reduce ;
2013-09-18 16:45:17 -04:00
: default-opts ( -- opts )
PCRE_UTF8 PCRE_UCP bitor ;
2013-09-18 16:45:17 -04:00
: (pcre) ( expr -- pcre err-message err-offset )
default-opts { c-string int } [ f pcre_compile ] with-out-parameters ;
2013-09-18 16:45:17 -04:00
: <pcre> ( expr -- pcre )
dup (pcre) 2array swap [ 2nip ] [ malformed-regexp ] if* ;
: exec ( pcre extra subject ofs opts -- count match-data )
[ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
: <pcre-extra> ( pcre -- pcre-extra )
0 { c-string } [ pcre_study ] with-out-parameters drop ;
: config ( what -- alien )
{ int } [ pcre_config ] with-out-parameters ;
! Finding stuff
TUPLE: matcher pcre extra subject ofs exec-opts match ;
: <matcher> ( subject compiled-pcre -- matcher )
[ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
: exec-result>match ( count match-data -- match/f )
over dup -1 <
[ pcre-error ] [ dup -1 = [ 3drop f ] [ drop 2array ] if ] if ;
! This handling of zero-length matches is taken from pcredemo.c
: empty-match-opts ( -- opts )
PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
: findnext ( matcher -- matcher'/f )
clone dup <mirror> values 6 firstn drop exec exec-result>match
[
[ >>match ]
[
second
[ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
[ second >>ofs ] bi
] bi
]
[
dup exec-opts>> 0 =
[ drop f ]
[
dup [ ofs>> 1 + ] [ subject>> length ] bi over <
[ 2drop f ]
[
[ >>ofs ] [ drop 0 >>exec-opts ] bi
] if
] if
] if* ;
! Result parsing
: substring-list ( subject match-array count -- alien )
{ void* } [ pcre_get_substring_list drop ] with-out-parameters ;
: parse-match ( subject nametable match-data -- match )
swapd first2 swap [ substring-list ] keep void* <c-direct-array>
[ alien>native-string ] { } map-as [ of swap 2array ] with map-index ;
! High-level
: <compiled-pcre> ( expr -- compiled-pcre )
<pcre> dup <pcre-extra> 2dup name-table-entries compiled-pcre boa ;
: has-option? ( compiled-pcre option -- ? )
[ pcre>> options ] dip bitand 0 > ;
GENERIC: findall ( subject obj -- matches )
2013-09-18 16:45:17 -04:00
M: compiled-pcre findall
[ <matcher> [ findnext ] follow [ match>> ] map pruned harvest ]
[ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
2013-09-18 16:45:17 -04:00
M: string findall
<compiled-pcre> findall ;
2013-09-18 16:45:17 -04:00
: matches? ( subject obj -- ? )
dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ;
: split ( subject obj -- strings )
dupd findall [ first second ] map
dup first [ replace-all ] keep splitting:split harvest ;