| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | USING: | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  |     accessors | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     alien.c-types alien.data alien.strings | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  |     arrays | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     assocs | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     fry | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     io.encodings.utf8 io.encodings.string | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  |     kernel | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     math | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     mirrors | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     pcre.ffi pcre.info | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     sequences sequences.generalizations | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     strings ;
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | QUALIFIED: splitting | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | IN: pcre | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | ERROR: malformed-regexp expr error ;
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | ERROR: pcre-error value ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | TUPLE: compiled-pcre pcre extra nametable ;
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! Gen. utility | 
					
						
							|  |  |  | : replace-all ( seq subseqs new -- seq )
 | 
					
						
							|  |  |  |     swapd '[ _ splitting:replace ] reduce ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | : default-opts ( -- opts )
 | 
					
						
							|  |  |  |     PCRE_UTF8 PCRE_UCP bitor ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | : (pcre) ( expr -- pcre err-message err-offset )
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     default-opts { c-string int } [ f pcre_compile ] with-out-parameters ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | : <pcre> ( expr -- pcre )
 | 
					
						
							|  |  |  |     dup (pcre) 2array swap [ 2nip ] [ malformed-regexp ] if* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : exec ( pcre extra subject ofs opts -- count match-data )
 | 
					
						
							|  |  |  |     [ dup length ] 2dip 30 int <c-array> 30 [ pcre_exec ] 2keep drop ;
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | : <pcre-extra> ( pcre -- pcre-extra )
 | 
					
						
							|  |  |  |     0 { c-string } [ pcre_study ] with-out-parameters drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | : config ( what -- alien )
 | 
					
						
							|  |  |  |     { int } [ pcre_config ] with-out-parameters ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! Finding stuff | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | TUPLE: matcher pcre extra subject ofs exec-opts match ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : <matcher> ( subject compiled-pcre -- matcher )
 | 
					
						
							|  |  |  |     [ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! This handling of zero-length matches is taken from pcredemo.c | 
					
						
							|  |  |  | : empty-match-opts ( -- opts )
 | 
					
						
							|  |  |  |     PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : findnext ( matcher -- matcher'/f )
 | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |     clone dup <mirror> values 6 firstn drop exec | 
					
						
							|  |  |  |     over dup -1 < [ pcre-error ] when
 | 
					
						
							|  |  |  |     -1 =
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     [ | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |         2drop dup exec-opts>> 0 =
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |         [ drop f ] | 
					
						
							|  |  |  |         [ | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |             ! dup [ ofs>> 1 + dup ] [ subject>> ] bi bounds-check? | 
					
						
							|  |  |  |             ! [ >>ofs 0 >>exec-opts findnext ] [ 2drop f ] if | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |             dup [ ofs>> 1 + ] [ subject>> length ] bi over <
 | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |             [ 2drop f ] [ >>ofs 0 >>exec-opts findnext ] if
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |         ] if
 | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |     ] | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         [ 2array >>match ] | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             nip
 | 
					
						
							|  |  |  |             [ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ] | 
					
						
							|  |  |  |             [ second >>ofs ] bi
 | 
					
						
							|  |  |  |         ] 2bi
 | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | ! Result parsing | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : substring-list ( subject match-array count -- alien )
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     { void* } [ pcre_get_substring_list drop ] with-out-parameters ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : parse-match ( subject nametable match-data -- match )
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     swapd first2 swap [ substring-list ] keep void* <c-direct-array> | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     [ alien>native-string ] { } map-as [ of swap 2array ] with map-index ;
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! High-level | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | : <compiled-pcre> ( expr -- compiled-pcre )
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     <pcre> dup <pcre-extra> 2dup name-table-entries compiled-pcre boa ;
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : has-option? ( compiled-pcre option -- ? )
 | 
					
						
							|  |  |  |     [ pcre>> options ] dip bitand 0 > ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | GENERIC: findall ( subject obj -- matches )
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | M: compiled-pcre findall | 
					
						
							| 
									
										
										
										
											2013-09-23 08:51:36 -04:00
										 |  |  |     [ <matcher> [ findnext ] follow [ match>> ] map harvest ] | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  |     [ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  | M: string findall | 
					
						
							|  |  |  |     <compiled-pcre> findall ;
 | 
					
						
							| 
									
										
										
										
											2013-09-18 16:45:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : matches? ( subject obj -- ? )
 | 
					
						
							| 
									
										
										
										
											2013-09-21 20:52:39 -04:00
										 |  |  |     dupd findall [ nip length 1 = ] [ ?first ?first ?last = ] 2bi and ;
 | 
					
						
							| 
									
										
										
										
											2013-09-19 17:41:11 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-22 15:48:16 -04:00
										 |  |  | : split ( subject obj -- strings )
 | 
					
						
							|  |  |  |     dupd findall [ first second ] map
 | 
					
						
							|  |  |  |     dup first [ replace-all ] keep splitting:split harvest ;
 |