diff --git a/extra/pcre/pcre.factor b/extra/pcre/pcre.factor index e66e1ad769..08e0a84540 100644 --- a/extra/pcre/pcre.factor +++ b/extra/pcre/pcre.factor @@ -1,12 +1,22 @@ USING: accessors alien alien.accessors alien.c-types alien.data alien.enums alien.strings arrays assocs fry io.encodings.string -io.encodings.utf8 kernel math math.bitwise mirrors pcre.ffi -sequences sequences.generalizations splitting strings ; +io.encodings.utf8 kernel literals math math.bitwise mirrors +pcre.ffi sequences sequences.generalizations splitting strings ; QUALIFIED: regexp IN: pcre +ERROR: malformed-regexp expr error ; + +ERROR: pcre-error value ; + string ] map harvest ; + : 2with ( param1 param2 obj quot -- obj curry ) [ -rot ] dip [ [ rot ] dip call ] 3curry ; inline @@ -20,6 +30,9 @@ IN: pcre 1 + 2dup swap ?nth [ utf8-start-byte? [ nip ] [ next-utf8-char ] if ] [ 2drop f ] if* ; +: config ( what -- alien ) + { int } [ pcre_config ] with-out-parameters ; + : fullinfo ( pcre extra what -- obj ) { int } [ pcre_fullinfo ] with-out-parameters nip ; @@ -37,22 +50,14 @@ IN: pcre [ 1 alien-unsigned-1 ] [ 2 + utf8 alien>string ] bi ; -: options ( pcre -- opts ) +: options ( pcre -- opts ) f PCRE_INFO_OPTIONS fullinfo ; : name-table-entries ( pcre extra -- addrs ) [ name-table ] [ name-entry-size ] [ name-count ] 2tri gen-array-addrs [ name-table-entry 2array ] map ; -PRIVATE> - -ERROR: malformed-regexp expr error ; -ERROR: pcre-error value ; - -TUPLE: compiled-pcre pcre extra nametable ; - -: default-opts ( -- opts ) - PCRE_UTF8 PCRE_UCP bitor ; +CONSTANT: default-opts flags{ PCRE_UTF8 PCRE_UCP } : (pcre) ( expr -- pcre err-message err-offset ) default-opts { c-string int } [ f pcre_compile ] with-out-parameters ; @@ -60,24 +65,18 @@ TUPLE: compiled-pcre pcre extra nametable ; : ( expr -- pcre ) dup (pcre) 2array swap [ 2nip ] [ malformed-regexp ] if* ; -: exec ( pcre extra subject ofs opts -- count match-data ) - [ dup length ] 2dip 30 int 30 [ pcre_exec ] 2keep drop ; - : ( pcre -- pcre-extra ) 0 { c-string } [ pcre_study ] with-out-parameters drop ; -: config ( what -- alien ) - { int } [ pcre_config ] with-out-parameters ; +: exec ( pcre extra subject ofs opts -- count match-data ) + [ dup length ] 2dip 30 int 30 [ pcre_exec ] 2keep drop ; -! Finding stuff TUPLE: matcher pcre extra subject ofs exec-opts match ; : ( subject compiled-pcre -- matcher ) [ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ; -! This handling of zero-length matches is taken from pcredemo.c -: empty-match-opts ( -- opts ) - PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ; +CONSTANT: empty-match-opts flags{ PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED } : findnext ( matcher -- matcher'/f ) clone dup values 6 firstn drop exec @@ -100,7 +99,6 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ; ] 2bi ] if ; -! Result parsing : substring-list ( subject match-array count -- alien ) { void* } [ pcre_get_substring_list drop ] with-out-parameters ; @@ -108,7 +106,10 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ; swapd first2 swap [ substring-list ] keep void* [ utf8 alien>string ] { } map-as [ of swap 2array ] with map-index ; -! High-level +PRIVATE> + +TUPLE: compiled-pcre pcre extra nametable ; + : ( expr -- compiled-pcre ) dup 2dup name-table-entries compiled-pcre boa ; @@ -130,15 +131,5 @@ M: regexp:regexp findall : matches? ( subject obj -- ? ) dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ; -string ] map harvest ; - -PRIVATE> - : split ( subject obj -- strings ) dupd findall [ first second ] map split-subseqs ;