pcre: new module pcre.utils for the utility functions which aren't really regexp-related
parent
80c32e2bc6
commit
ba8810dac3
|
@ -44,6 +44,34 @@ CONSTANT: PCRE_PARTIAL_HARD 0x08000000
|
|||
CONSTANT: PCRE_NOTEMPTY_ATSTART 0x10000000
|
||||
CONSTANT: PCRE_UCP 0x20000000
|
||||
|
||||
ENUM: PCRE_ERRORS
|
||||
{ PCRE_ERROR_NOMATCH -1 }
|
||||
{ PCRE_ERROR_NULL -2 }
|
||||
{ PCRE_ERROR_BADOPTION -3 }
|
||||
{ PCRE_ERROR_BADMAGIC -4 }
|
||||
{ PCRE_ERROR_UNKNOWN_OPCODE -5 }
|
||||
{ PCRE_ERROR_UNKNOWN_NODE -5 }
|
||||
{ PCRE_ERROR_NOMEMORY -6 }
|
||||
{ PCRE_ERROR_NOSUBSTRING -7 }
|
||||
{ PCRE_ERROR_MATCHLIMIT -8 }
|
||||
{ PCRE_ERROR_CALLOUT -9 }
|
||||
{ PCRE_ERROR_BADUTF8 -10 }
|
||||
{ PCRE_ERROR_BADUTF8_OFFSET -11 }
|
||||
{ PCRE_ERROR_PARTIAL -12 }
|
||||
{ PCRE_ERROR_BADPARTIAL -13 }
|
||||
{ PCRE_ERROR_INTERNAL -14 }
|
||||
{ PCRE_ERROR_BADCOUNT -15 }
|
||||
{ PCRE_ERROR_DFA_UITEM -16 }
|
||||
{ PCRE_ERROR_DFA_UCOND -17 }
|
||||
{ PCRE_ERROR_DFA_UMLIMIT -18 }
|
||||
{ PCRE_ERROR_DFA_WSSIZE -19 }
|
||||
{ PCRE_ERROR_DFA_RECURSE -20 }
|
||||
{ PCRE_ERROR_RECURSIONLIMIT -21 }
|
||||
{ PCRE_ERROR_NULLWSLIMIT -22 }
|
||||
{ PCRE_ERROR_BADNEWLINE -23 }
|
||||
{ PCRE_ERROR_BADOFFSET -24 }
|
||||
{ PCRE_ERROR_SHORTUTF8 -25 } ;
|
||||
|
||||
CONSTANT: PCRE_ERROR_NOMATCH -1
|
||||
CONSTANT: PCRE_ERROR_NULL -2
|
||||
CONSTANT: PCRE_ERROR_BADOPTION -3
|
||||
|
|
|
@ -4,17 +4,10 @@ USING:
|
|||
arrays
|
||||
kernel
|
||||
math
|
||||
pcre.ffi
|
||||
pcre.ffi pcre.utils
|
||||
sequences ;
|
||||
IN: pcre.info
|
||||
|
||||
! Gen. utility
|
||||
: 2with ( param1 param2 obj quot -- obj curry )
|
||||
[ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
|
||||
|
||||
: gen-array-addrs ( base size n -- addrs )
|
||||
iota [ * + ] 2with map ;
|
||||
|
||||
! Mostly internal
|
||||
: fullinfo ( pcre extra what -- obj )
|
||||
{ int } [ pcre_fullinfo ] with-out-parameters nip ;
|
||||
|
|
|
@ -51,6 +51,10 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
|
|||
|
||||
[ 3 ] [ "foobar" "foo(?=bar)" findall first first second length ] unit-test
|
||||
|
||||
[ { { { f ", " } } { { f ", " } } { { f "." } } } ] [
|
||||
"Words, words, word." "\\W+" findall
|
||||
] unit-test
|
||||
|
||||
[ { ", " ", " "." } ] [
|
||||
"Words, words, word." "\\W+" findall [ first second ] map
|
||||
] unit-test
|
||||
|
@ -65,11 +69,12 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
|
|||
[ { { { f "foo" } } { { f "" } } } ]
|
||||
[ "foo" ".*" findall ] unit-test
|
||||
|
||||
[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
|
||||
[ { { { f "" } } { { f "" } } { { f "" } } } ]
|
||||
[ "foo" "B*" findall ] unit-test
|
||||
|
||||
! Empty matches in strings with multi-byte characters are tricky.
|
||||
[ ] [ "ööööö" "x*" findall ] unit-test
|
||||
[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
|
||||
[ "öööö" "x*" findall ] unit-test
|
||||
|
||||
! Tests for matches?
|
||||
[ t ] [ "örjan" "örjan" matches? ] unit-test
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
USING:
|
||||
accessors
|
||||
alien.c-types alien.data alien.strings
|
||||
alien.c-types alien.data alien.enums alien.strings
|
||||
arrays
|
||||
assocs
|
||||
fry
|
||||
io.encodings.utf8 io.encodings.string
|
||||
kernel
|
||||
math
|
||||
mirrors
|
||||
pcre.ffi pcre.info
|
||||
pcre.ffi pcre.info pcre.utils
|
||||
sequences sequences.generalizations
|
||||
strings ;
|
||||
QUALIFIED: splitting
|
||||
IN: pcre
|
||||
|
||||
ERROR: malformed-regexp expr error ;
|
||||
|
@ -19,10 +17,6 @@ ERROR: pcre-error value ;
|
|||
|
||||
TUPLE: compiled-pcre pcre extra nametable ;
|
||||
|
||||
! Gen. utility
|
||||
: replace-all ( seq subseqs new -- seq )
|
||||
swapd '[ _ splitting:replace ] reduce ;
|
||||
|
||||
: default-opts ( -- opts )
|
||||
PCRE_UTF8 PCRE_UCP bitor ;
|
||||
|
||||
|
@ -53,17 +47,14 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ;
|
|||
|
||||
: findnext ( matcher -- matcher'/f )
|
||||
clone dup <mirror> values 6 firstn drop exec
|
||||
over dup -1 < [ pcre-error ] when
|
||||
over dup -1 < [ PCRE_ERRORS number>enum pcre-error ] when
|
||||
-1 =
|
||||
[
|
||||
2drop dup exec-opts>> 0 =
|
||||
[ drop f ]
|
||||
[
|
||||
! dup [ ofs>> 1 + dup ] [ subject>> ] bi bounds-check?
|
||||
! [ >>ofs 0 >>exec-opts findnext ] [ 2drop f ] if
|
||||
|
||||
dup [ ofs>> 1 + ] [ subject>> length ] bi over <
|
||||
[ 2drop f ] [ >>ofs 0 >>exec-opts findnext ] if
|
||||
dup [ subject>> ] [ ofs>> ] bi next-utf8-char
|
||||
[ >>ofs 0 >>exec-opts findnext ] [ drop f ] if*
|
||||
] if
|
||||
]
|
||||
[
|
||||
|
@ -103,5 +94,4 @@ M: string findall
|
|||
dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ;
|
||||
|
||||
: split ( subject obj -- strings )
|
||||
dupd findall [ first second ] map
|
||||
dup first [ replace-all ] keep splitting:split harvest ;
|
||||
dupd findall [ first second ] map split-subseqs ;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
USING: pcre.utils tools.test ;
|
||||
IN: pcre.utils.tests
|
||||
|
||||
[ { "Bords" "words" "word" } ] [
|
||||
"Bords, words, word." { ", " ", " "." } split-subseqs
|
||||
] unit-test
|
|
@ -0,0 +1,21 @@
|
|||
USING: assocs fry kernel math mirrors sequences splitting strings ;
|
||||
IN: pcre.utils
|
||||
|
||||
: replace-all ( seq subseqs new -- seq )
|
||||
swapd '[ _ replace ] reduce ;
|
||||
|
||||
: split-subseqs ( seq subseqs -- seqs )
|
||||
dup first [ replace-all ] keep split-subseq [ >string ] map harvest ;
|
||||
|
||||
: 2with ( param1 param2 obj quot -- obj curry )
|
||||
[ -rot ] dip [ [ rot ] dip call ] 3curry ; inline
|
||||
|
||||
: gen-array-addrs ( base size n -- addrs )
|
||||
iota [ * + ] 2with map ;
|
||||
|
||||
: utf8-start-byte? ( byte -- ? )
|
||||
0xc0 bitand 0x80 = not ;
|
||||
|
||||
: next-utf8-char ( byte-array pos -- pos' )
|
||||
1 + 2dup swap ?nth
|
||||
[ utf8-start-byte? [ nip ] [ next-utf8-char ] if ] [ 2drop f ] if* ;
|
Loading…
Reference in New Issue