pcre: stop using mirror and cloning matchers.
parent
a634f8c0ff
commit
ebf3b58ad3
|
@ -12,8 +12,6 @@ IN: pcre.tests
|
||||||
<compiled-pcre> nametable>>
|
<compiled-pcre> nametable>>
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
[ { 100 110 120 130 } ] [ 100 10 4 gen-array-addrs ] unit-test
|
|
||||||
|
|
||||||
CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
|
CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
|
||||||
|
|
||||||
! On windows the erroffset appears to be set to 0 despite there being
|
! On windows the erroffset appears to be set to 0 despite there being
|
||||||
|
@ -29,16 +27,16 @@ CONSTANT: iso-date "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
|
||||||
|
|
||||||
[ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
|
[ t ] [ "foo" <compiled-pcre> PCRE_UTF8 has-option? ] unit-test
|
||||||
|
|
||||||
os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
|
os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE pcre-config ] unit-test ] when
|
||||||
|
|
||||||
! In this day and age, not supporting utf-8 is broken.
|
! In this day and age, not supporting utf-8 is broken.
|
||||||
[ 1 ] [ PCRE_CONFIG_UTF8 config ] unit-test
|
[ 1 ] [ PCRE_CONFIG_UTF8 pcre-config ] unit-test
|
||||||
|
|
||||||
[ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES config ] unit-test
|
[ 1 ] [ PCRE_CONFIG_UNICODE_PROPERTIES pcre-config ] unit-test
|
||||||
|
|
||||||
! libpcre must not support 16 or 32 bit code points.
|
! libpcre must not support 16 or 32 bit code points.
|
||||||
[ 0 ] [ PCRE_CONFIG_UTF16 config ] unit-test
|
[ 0 ] [ PCRE_CONFIG_UTF16 pcre-config ] unit-test
|
||||||
[ 0 ] [ PCRE_CONFIG_UTF32 config ] unit-test
|
[ 0 ] [ PCRE_CONFIG_UTF32 pcre-config ] unit-test
|
||||||
|
|
||||||
! Tests for findall
|
! Tests for findall
|
||||||
[
|
[
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
|
! Copyright (C) 2013 Björn Lindqvist
|
||||||
|
! See http://factorcode.org/license.txt for BSD license
|
||||||
|
|
||||||
USING: accessors alien alien.accessors alien.c-types alien.data
|
USING: accessors alien alien.accessors alien.c-types alien.data
|
||||||
alien.enums alien.strings arrays assocs fry io.encodings.string
|
alien.enums alien.strings arrays assocs combinators fry
|
||||||
io.encodings.utf8 kernel literals math math.bitwise mirrors
|
io.encodings.string io.encodings.utf8 kernel literals math
|
||||||
pcre.ffi sequences sequences.generalizations splitting strings ;
|
math.bitwise pcre.ffi sequences splitting strings ;
|
||||||
QUALIFIED: regexp
|
QUALIFIED: regexp
|
||||||
IN: pcre
|
IN: pcre
|
||||||
|
|
||||||
|
@ -20,42 +23,43 @@ ERROR: pcre-error value ;
|
||||||
: 2with ( param1 param2 obj quot -- obj curry )
|
: 2with ( param1 param2 obj quot -- obj curry )
|
||||||
[ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
|
[ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
|
||||||
|
|
||||||
: gen-array-addrs ( base size n -- addrs )
|
|
||||||
iota [ * + ] 2with map ;
|
|
||||||
|
|
||||||
: utf8-start-byte? ( byte -- ? )
|
: utf8-start-byte? ( byte -- ? )
|
||||||
0xc0 bitand 0x80 = not ;
|
0xc0 bitand 0x80 = not ;
|
||||||
|
|
||||||
: next-utf8-char ( byte-array pos -- pos' )
|
: next-utf8-char ( byte-array pos -- pos' )
|
||||||
1 + 2dup swap ?nth
|
1 + 2dup swap ?nth [
|
||||||
[ utf8-start-byte? [ nip ] [ next-utf8-char ] if ] [ 2drop f ] if* ;
|
utf8-start-byte? [ nip ] [ next-utf8-char ] if
|
||||||
|
] [ 2drop f ] if* ;
|
||||||
|
|
||||||
: config ( what -- alien )
|
: pcre-config ( what -- alien )
|
||||||
{ int } [ pcre_config ] with-out-parameters ;
|
{ int } [ pcre_config ] with-out-parameters ;
|
||||||
|
|
||||||
: fullinfo ( pcre extra what -- obj )
|
: pcre-fullinfo ( pcre extra what -- obj )
|
||||||
{ int } [ pcre_fullinfo ] with-out-parameters nip ;
|
{ int } [ pcre_fullinfo ] with-out-parameters nip ;
|
||||||
|
|
||||||
|
: pcre-substring-list ( subject match-array count -- alien )
|
||||||
|
{ void* } [ pcre_get_substring_list drop ] with-out-parameters ;
|
||||||
|
|
||||||
: name-count ( pcre extra -- n )
|
: name-count ( pcre extra -- n )
|
||||||
PCRE_INFO_NAMECOUNT fullinfo ;
|
PCRE_INFO_NAMECOUNT pcre-fullinfo ;
|
||||||
|
|
||||||
: name-table ( pcre extra -- addr )
|
: name-table ( pcre extra -- addr )
|
||||||
[ drop alien-address 32 on-bits unmask ]
|
[ drop alien-address 32 on-bits unmask ]
|
||||||
[ PCRE_INFO_NAMETABLE fullinfo ] 2bi + ;
|
[ PCRE_INFO_NAMETABLE pcre-fullinfo ] 2bi + ;
|
||||||
|
|
||||||
: name-entry-size ( pcre extra -- size )
|
: name-entry-size ( pcre extra -- size )
|
||||||
PCRE_INFO_NAMEENTRYSIZE fullinfo ;
|
PCRE_INFO_NAMEENTRYSIZE pcre-fullinfo ;
|
||||||
|
|
||||||
: name-table-entry ( addr -- group-index group-name )
|
: name-table-entry ( addr -- group-index group-name )
|
||||||
[ <alien> 1 alien-unsigned-1 ]
|
[ <alien> 1 alien-unsigned-1 ]
|
||||||
[ 2 + <alien> utf8 alien>string ] bi ;
|
[ 2 + <alien> utf8 alien>string ] bi ;
|
||||||
|
|
||||||
: options ( pcre -- opts )
|
|
||||||
f PCRE_INFO_OPTIONS fullinfo ;
|
|
||||||
|
|
||||||
: name-table-entries ( pcre extra -- addrs )
|
: name-table-entries ( pcre extra -- addrs )
|
||||||
[ name-table ] [ name-entry-size ] [ name-count ] 2tri
|
[ name-table ] [ name-entry-size ] [ name-count ] 2tri
|
||||||
gen-array-addrs [ name-table-entry 2array ] map ;
|
iota [ * + name-table-entry 2array ] 2with map ;
|
||||||
|
|
||||||
|
: options ( pcre -- opts )
|
||||||
|
f PCRE_INFO_OPTIONS pcre-fullinfo ;
|
||||||
|
|
||||||
CONSTANT: default-opts flags{ PCRE_UTF8 PCRE_UCP }
|
CONSTANT: default-opts flags{ PCRE_UTF8 PCRE_UCP }
|
||||||
|
|
||||||
|
@ -71,39 +75,43 @@ CONSTANT: default-opts flags{ PCRE_UTF8 PCRE_UCP }
|
||||||
: exec ( pcre extra subject ofs opts -- count match-data )
|
: exec ( pcre extra subject ofs opts -- count match-data )
|
||||||
[ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
|
[ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
|
||||||
|
|
||||||
TUPLE: matcher pcre extra subject ofs exec-opts match ;
|
TUPLE: matcher pcre extra subject ofs exec-opts ;
|
||||||
|
|
||||||
: <matcher> ( subject compiled-pcre -- matcher )
|
: <matcher> ( subject compiled-pcre -- matcher )
|
||||||
[ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
|
[ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 matcher boa ;
|
||||||
|
|
||||||
CONSTANT: empty-match-opts flags{ PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED }
|
CONSTANT: empty-match-opts flags{ PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED }
|
||||||
|
|
||||||
: findnext ( matcher -- matcher'/f )
|
: findnext ( matcher -- matcher match/f )
|
||||||
clone dup <mirror> values 6 firstn drop exec
|
dup {
|
||||||
over dup -1 < [ PCRE_ERRORS number>enum pcre-error ] when
|
[ pcre>> ]
|
||||||
-1 =
|
[ extra>> ]
|
||||||
[
|
[ subject>> ]
|
||||||
2drop dup exec-opts>> 0 =
|
[ ofs>> ]
|
||||||
[ drop f ]
|
[ exec-opts>> ]
|
||||||
[
|
} cleave exec over dup -1 < [
|
||||||
dup [ subject>> ] [ ofs>> ] bi next-utf8-char
|
PCRE_ERRORS number>enum pcre-error
|
||||||
[ >>ofs 0 >>exec-opts findnext ] [ drop f ] if*
|
] [
|
||||||
|
-1 = [
|
||||||
|
2drop dup exec-opts>> 0 =
|
||||||
|
[ f ]
|
||||||
|
[
|
||||||
|
dup [ subject>> ] [ ofs>> ] bi next-utf8-char
|
||||||
|
[ >>ofs 0 >>exec-opts findnext ] [ f ] if*
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
[
|
||||||
|
nip
|
||||||
|
[ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
|
||||||
|
[ second >>ofs ] bi
|
||||||
|
] [
|
||||||
|
2array
|
||||||
|
] 2bi
|
||||||
] if
|
] if
|
||||||
]
|
|
||||||
[
|
|
||||||
[ 2array >>match ]
|
|
||||||
[
|
|
||||||
nip
|
|
||||||
[ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
|
|
||||||
[ second >>ofs ] bi
|
|
||||||
] 2bi
|
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: substring-list ( subject match-array count -- alien )
|
|
||||||
{ void* } [ pcre_get_substring_list drop ] with-out-parameters ;
|
|
||||||
|
|
||||||
: parse-match ( subject nametable match-data -- match )
|
: parse-match ( subject nametable match-data -- match )
|
||||||
swapd first2 swap [ substring-list ] keep void* <c-direct-array>
|
swapd first2 swap [ pcre-substring-list ] keep void* <c-direct-array>
|
||||||
[ utf8 alien>string ] { } map-as [ of swap 2array ] with map-index ;
|
[ utf8 alien>string ] { } map-as [ of swap 2array ] with map-index ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
@ -119,7 +127,7 @@ TUPLE: compiled-pcre pcre extra nametable ;
|
||||||
GENERIC: findall ( subject obj -- matches )
|
GENERIC: findall ( subject obj -- matches )
|
||||||
|
|
||||||
M: compiled-pcre findall
|
M: compiled-pcre findall
|
||||||
[ <matcher> [ findnext ] follow [ match>> ] map harvest ]
|
[ <matcher> [ findnext dup ] [ ] produce 2nip ]
|
||||||
[ nametable>> rot [ parse-match ] 2with { } map-as ] 2bi ;
|
[ nametable>> rot [ parse-match ] 2with { } map-as ] 2bi ;
|
||||||
|
|
||||||
M: string findall
|
M: string findall
|
||||||
|
|
Loading…
Reference in New Issue