| 
									
										
										
										
											2007-11-26 13:59:04 -05:00
										 |  |  | USING: arrays combinators kernel lazy-lists math math.parser | 
					
						
							| 
									
										
										
										
											2007-11-25 04:56:04 -05:00
										 |  |  | namespaces parser parser-combinators parser-combinators.simple | 
					
						
							| 
									
										
										
										
											2007-12-08 17:47:09 -05:00
										 |  |  | promises quotations sequences combinators.lib strings | 
					
						
							| 
									
										
										
										
											2008-02-01 19:26:32 -05:00
										 |  |  | assocs prettyprint.backend memoize unicode.case unicode.categories ;
 | 
					
						
							| 
									
										
										
										
											2007-12-03 22:46:56 -05:00
										 |  |  | USE: io | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | IN: regexp | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: ignore-case? | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : char=-quot ( ch -- quot )
 | 
					
						
							|  |  |  |     ignore-case? get
 | 
					
						
							|  |  |  |     [ ch>upper [ swap ch>upper = ] ] [ [ = ] ] if
 | 
					
						
							|  |  |  |     curry ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : char-between?-quot ( ch1 ch2 -- quot )
 | 
					
						
							|  |  |  |     ignore-case? get
 | 
					
						
							|  |  |  |     [ [ ch>upper ] 2apply [ >r >r ch>upper r> r> between? ] ] | 
					
						
							|  |  |  |     [ [ between? ] ] | 
					
						
							|  |  |  |     if 2curry ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : or-predicates ( quots -- quot )
 | 
					
						
							|  |  |  |     [ \ dup add* ] map [ [ t ] ] f short-circuit \ nip add ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | : <@literal [ nip ] curry <@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : <@delay [ curry ] curry <@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : ascii? ( n -- ? )  | 
					
						
							|  |  |  |     0 HEX: 7f between? ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : octal-digit? ( n -- ? )
 | 
					
						
							|  |  |  |     CHAR: 0 CHAR: 7 between? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : decimal-digit? ( n -- ? )
 | 
					
						
							|  |  |  |     CHAR: 0 CHAR: 9 between? ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : hex-digit? ( n -- ? )
 | 
					
						
							|  |  |  |     dup decimal-digit? | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     over CHAR: a CHAR: f between? or
 | 
					
						
							|  |  |  |     swap CHAR: A CHAR: F between? or ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : control-char? ( n -- ? )
 | 
					
						
							|  |  |  |     dup 0 HEX: 1f between? | 
					
						
							|  |  |  |     swap HEX: 7f = or ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : punct? ( n -- ? )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 17:47:09 -05:00
										 |  |  |     "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : c-identifier-char? ( ch -- ? )
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     dup alpha? swap CHAR: _ = or ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : java-blank? ( n -- ? )
 | 
					
						
							| 
									
										
										
										
											2007-11-26 13:59:04 -05:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |         CHAR: \s
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  |         CHAR: \t CHAR: \n CHAR: \r
 | 
					
						
							|  |  |  |         HEX: c HEX: 7 HEX: 1b | 
					
						
							| 
									
										
										
										
											2007-12-08 17:47:09 -05:00
										 |  |  |     } member? ;
 | 
					
						
							| 
									
										
										
										
											2007-11-26 13:59:04 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | : java-printable? ( n -- ? )
 | 
					
						
							|  |  |  |     dup alpha? swap punct? or ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | : 'ordinary-char' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 17:47:09 -05:00
										 |  |  |     [ "\\^*+?|(){}[$" member? not ] satisfy | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ char=-quot ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:17:12 -05:00
										 |  |  | : 'octal-digit' ( -- parser ) [ octal-digit? ] satisfy ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | : 'octal' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:17:12 -05:00
										 |  |  |     "0" token 'octal-digit' 1 3 from-m-to-n &> | 
					
						
							|  |  |  |     [ oct> ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'hex-digit' ( -- parser ) [ hex-digit? ] satisfy ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'hex' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     "x" token 'hex-digit' 2 exactly-n &> | 
					
						
							| 
									
										
										
										
											2008-02-06 20:23:39 -05:00
										 |  |  |     "u" token 'hex-digit' 6 exactly-n &> <|> | 
					
						
							| 
									
										
										
										
											2007-12-02 07:17:12 -05:00
										 |  |  |     [ hex> ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : satisfy-tokens ( assoc -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ >r token r> <@literal ] { } assoc>map <or-parser> ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'simple-escape-char' ( -- parser )
 | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |         { "\\" CHAR: \\ } | 
					
						
							|  |  |  |         { "t"  CHAR: \t } | 
					
						
							|  |  |  |         { "n"  CHAR: \n } | 
					
						
							|  |  |  |         { "r"  CHAR: \r } | 
					
						
							|  |  |  |         { "f"  HEX: c   } | 
					
						
							|  |  |  |         { "a"  HEX: 7   } | 
					
						
							|  |  |  |         { "e"  HEX: 1b  } | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     } [ char=-quot ] assoc-map satisfy-tokens ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'predefined-char-class' ( -- parser )
 | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |         { "d" [ digit? ] } | 
					
						
							|  |  |  |         { "D" [ digit? not ] } | 
					
						
							|  |  |  |         { "s" [ java-blank? ] } | 
					
						
							|  |  |  |         { "S" [ java-blank? not ] } | 
					
						
							|  |  |  |         { "w" [ c-identifier-char? ] } | 
					
						
							|  |  |  |         { "W" [ c-identifier-char? not ] } | 
					
						
							|  |  |  |     } satisfy-tokens ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'posix-character-class' ( -- parser )
 | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |         { "Lower" [ letter? ] } | 
					
						
							|  |  |  |         { "Upper" [ LETTER? ] } | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |         { "ASCII" [ ascii? ] } | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |         { "Alpha" [ Letter? ] } | 
					
						
							|  |  |  |         { "Digit" [ digit? ] } | 
					
						
							|  |  |  |         { "Alnum" [ alpha? ] } | 
					
						
							|  |  |  |         { "Punct" [ punct? ] } | 
					
						
							|  |  |  |         { "Graph" [ java-printable? ] } | 
					
						
							|  |  |  |         { "Print" [ java-printable? ] } | 
					
						
							|  |  |  |         { "Blank" [ " \t" member? ] } | 
					
						
							|  |  |  |         { "Cntrl" [ control-char? ] } | 
					
						
							|  |  |  |         { "XDigit" [ hex-digit? ] } | 
					
						
							|  |  |  |         { "Space" [ java-blank? ] } | 
					
						
							|  |  |  |     } satisfy-tokens "p{" "}" surrounded-by ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : 'simple-escape' ( -- parser )
 | 
					
						
							|  |  |  |     'octal' | 
					
						
							|  |  |  |     'hex' <|> | 
					
						
							|  |  |  |     "c" token [ LETTER? ] satisfy &> <|> | 
					
						
							|  |  |  |     any-char-parser <|> | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ char=-quot ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : 'escape' ( -- parser )
 | 
					
						
							|  |  |  |     "\\" token | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  |     'simple-escape-char' | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  |     'predefined-char-class' <|> | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  |     'posix-character-class' <|> | 
					
						
							|  |  |  |     'simple-escape' <|> &> ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : 'any-char' | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     "." token [ drop t ] <@literal ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : 'char' | 
					
						
							|  |  |  |     'any-char' 'escape' 'ordinary-char' <|> <|> [ satisfy ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | DEFER: 'regexp' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TUPLE: group-result str ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | C: <group-result> group-result | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | : 'non-capturing-group' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  |     "?:" token 'regexp' &> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'positive-lookahead-group' ( -- parser )
 | 
					
						
							|  |  |  |     "?=" token 'regexp' &> [ ensure ] <@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'negative-lookahead-group' ( -- parser )
 | 
					
						
							|  |  |  |     "?!" token 'regexp' &> [ ensure-not ] <@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'simple-group' ( -- parser )
 | 
					
						
							|  |  |  |     'regexp' [ [ <group-result> ] <@ ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'group' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  |     'non-capturing-group' | 
					
						
							|  |  |  |     'positive-lookahead-group' | 
					
						
							|  |  |  |     'negative-lookahead-group' | 
					
						
							|  |  |  |     'simple-group' <|> <|> <|> | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     "(" ")" surrounded-by ;
 | 
					
						
							| 
									
										
										
										
											2007-11-26 18:19:29 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | : 'range' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2008-03-11 04:35:59 -04:00
										 |  |  |     [ CHAR: ] = not ] satisfy "-" token <& | 
					
						
							|  |  |  |     [ CHAR: ] = not ] satisfy <&> | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ first2 char-between?-quot ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : 'character-class-term' ( -- parser )
 | 
					
						
							|  |  |  |     'range' | 
					
						
							|  |  |  |     'escape' <|> | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ "\\]" member? not ] satisfy [ char=-quot ] <@ <|> ;
 | 
					
						
							| 
									
										
										
										
											2007-11-26 18:19:29 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : 'positive-character-class' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     "]" token [ CHAR: ] = ] <@literal 'character-class-term' <*> <&:> | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     'character-class-term' <+> <|> | 
					
						
							|  |  |  |     [ or-predicates ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  | : 'negative-character-class' ( -- parser )
 | 
					
						
							|  |  |  |     "^" token 'positive-character-class' &> | 
					
						
							|  |  |  |     [ [ not ] append ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-26 18:19:29 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-30 20:20:02 -05:00
										 |  |  | : 'character-class' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     'negative-character-class' 'positive-character-class' <|> | 
					
						
							|  |  |  |     "[" "]" surrounded-by [ satisfy ] <@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'escaped-seq' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     any-char-parser <*> | 
					
						
							|  |  |  |     [ ignore-case? get <token-parser> ] <@ | 
					
						
							|  |  |  |     "\\Q" "\\E" surrounded-by ;
 | 
					
						
							| 
									
										
										
										
											2007-11-26 18:19:29 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  | : 'break' ( quot -- parser )
 | 
					
						
							|  |  |  |     satisfy ensure epsilon just <|> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'break-escape' ( -- parser )
 | 
					
						
							|  |  |  |     "$" token [ "\r\n" member? ] 'break' <@literal | 
					
						
							|  |  |  |     "\\b" token [ blank? ] 'break' <@literal <|> | 
					
						
							|  |  |  |     "\\B" token [ blank? not ] 'break' <@literal <|> | 
					
						
							|  |  |  |     "\\z" token epsilon just <@literal <|> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : 'simple' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:07:32 -05:00
										 |  |  |     'escaped-seq' | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  |     'break-escape' <|> | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     'group' <|> | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  |     'character-class' <|> | 
					
						
							|  |  |  |     'char' <|> ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | : 'exactly-n' ( -- parser )
 | 
					
						
							|  |  |  |     'integer' [ exactly-n ] <@delay ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'at-least-n' ( -- parser )
 | 
					
						
							|  |  |  |     'integer' "," token <& [ at-least-n ] <@delay ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'at-most-n' ( -- parser )
 | 
					
						
							|  |  |  |     "," token 'integer' &> [ at-most-n ] <@delay ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'from-m-to-n' ( -- parser )
 | 
					
						
							|  |  |  |     'integer' "," token <& 'integer' <&> [ first2 from-m-to-n ] <@delay ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 22:46:56 -05:00
										 |  |  | : 'greedy-interval' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     'exactly-n' 'at-least-n' <|> 'at-most-n' <|> 'from-m-to-n' <|> ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 22:46:56 -05:00
										 |  |  | : 'interval' ( -- parser )
 | 
					
						
							|  |  |  |     'greedy-interval' | 
					
						
							|  |  |  |     'greedy-interval' "?" token <& [ "reluctant {}" print ] <@ <|> | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     'greedy-interval' "+" token <& [ "possessive {}" print ] <@ <|> | 
					
						
							|  |  |  |     "{" "}" surrounded-by ;
 | 
					
						
							| 
									
										
										
										
											2007-12-02 07:17:12 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 22:46:56 -05:00
										 |  |  | : 'repetition' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     ! Posessive | 
					
						
							|  |  |  |     "*+" token [ <!*> ] <@literal | 
					
						
							|  |  |  |     "++" token [ <!+> ] <@literal <|> | 
					
						
							|  |  |  |     "?+" token [ <!?> ] <@literal <|> | 
					
						
							|  |  |  |     ! Reluctant | 
					
						
							|  |  |  |     "*?" token [ <(*)> ] <@literal <|> | 
					
						
							|  |  |  |     "+?" token [ <(+)> ] <@literal <|> | 
					
						
							|  |  |  |     "??" token [ <(?)> ] <@literal <|> | 
					
						
							|  |  |  |     ! Greedy | 
					
						
							|  |  |  |     "*" token [ <*> ] <@literal <|> | 
					
						
							|  |  |  |     "+" token [ <+> ] <@literal <|> | 
					
						
							|  |  |  |     "?" token [ <?> ] <@literal <|> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : 'dummy' ( -- parser )
 | 
					
						
							|  |  |  |     epsilon [ ] <@literal ;
 | 
					
						
							| 
									
										
										
										
											2007-12-03 22:46:56 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-10 02:20:30 -05:00
										 |  |  | MEMO: 'term' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     'simple' | 
					
						
							|  |  |  |     'repetition' 'interval' 'dummy' <|> <|> <&> [ first2 call ] <@ | 
					
						
							|  |  |  |     <!+> [ <and-parser> ] <@ ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | LAZY: 'regexp' ( -- parser )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     'term' "|" token nonempty-list-of [ <or-parser> ] <@ ;
 | 
					
						
							|  |  |  | !    "^" token 'term' "|" token nonempty-list-of [ <or-parser> ] <@ | 
					
						
							|  |  |  | !        &> [ "caret" print ] <@ <|> | 
					
						
							|  |  |  | !    'term' "|" token nonempty-list-of [ <or-parser> ] <@ | 
					
						
							|  |  |  | !        "$" token <& [ "dollar" print ] <@ <|> | 
					
						
							|  |  |  | !    "^" token 'term' "|" token nonempty-list-of [ <or-parser> ] <@ &> | 
					
						
							|  |  |  | !        "$" token [ "caret dollar" print ] <@ <& <|> ; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TUPLE: regexp source parser ignore-case? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : <regexp> ( string ignore-case? -- regexp )
 | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         ignore-case? [ | 
					
						
							|  |  |  |             dup 'regexp' just parse-1 | 
					
						
							|  |  |  |         ] with-variable
 | 
					
						
							|  |  |  |     ] keep regexp construct-boa ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : do-ignore-case ( string regexp -- string regexp )
 | 
					
						
							|  |  |  |     dup regexp-ignore-case? [ >r >upper r> ] when ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : matches? ( string regexp -- ? )
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     do-ignore-case regexp-parser just parse nil? not ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : match-head ( string regexp -- end )
 | 
					
						
							|  |  |  |     do-ignore-case regexp-parser parse dup nil? | 
					
						
							|  |  |  |     [ drop f ] [ car parse-result-unparsed slice-from ] if ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | ! Literal syntax for regexps | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  | : parse-options ( string -- ? )
 | 
					
						
							|  |  |  |     #! Lame | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         { "" [ f ] } | 
					
						
							|  |  |  |         { "i" [ t ] } | 
					
						
							|  |  |  |     } case ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | : parse-regexp ( accum end -- accum )
 | 
					
						
							|  |  |  |     lexer get dup skip-blank [ | 
					
						
							|  |  |  |         [ index* dup 1+ swap ] 2keep swapd subseq swap
 | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     ] change-column | 
					
						
							|  |  |  |     lexer get (parse-token) parse-options <regexp> parsed ;
 | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : R! CHAR: ! parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | : R" CHAR: " parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : R# CHAR: # parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | : R' CHAR: ' parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : R( CHAR: ) parse-regexp ; parsing | 
					
						
							|  |  |  | : R/ CHAR: / parse-regexp ; parsing | 
					
						
							|  |  |  | : R@ CHAR: @ parse-regexp ; parsing | 
					
						
							|  |  |  | : R[ CHAR: ] parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-11-25 04:51:30 -05:00
										 |  |  | : R` CHAR: ` parse-regexp ; parsing | 
					
						
							| 
									
										
										
										
											2007-12-03 19:20:47 -05:00
										 |  |  | : R{ CHAR: } parse-regexp ; parsing | 
					
						
							|  |  |  | : R| CHAR: | parse-regexp ; parsing | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : find-regexp-syntax ( string -- prefix suffix )
 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         { "R/ "  "/"  } | 
					
						
							|  |  |  |         { "R! "  "!"  } | 
					
						
							|  |  |  |         { "R\" " "\"" } | 
					
						
							|  |  |  |         { "R# "  "#"  } | 
					
						
							|  |  |  |         { "R' "  "'"  } | 
					
						
							|  |  |  |         { "R( "  ")"  } | 
					
						
							|  |  |  |         { "R@ "  "@"  } | 
					
						
							|  |  |  |         { "R[ "  "]"  } | 
					
						
							|  |  |  |         { "R` "  "`"  } | 
					
						
							|  |  |  |         { "R{ "  "}"  } | 
					
						
							|  |  |  |         { "R| "  "|"  } | 
					
						
							|  |  |  |     } swap [ subseq? not nip ] curry assoc-find drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: regexp pprint* | 
					
						
							| 
									
										
										
										
											2007-12-08 03:21:59 -05:00
										 |  |  |     [ | 
					
						
							|  |  |  |         dup regexp-source | 
					
						
							|  |  |  |         dup find-regexp-syntax swap % swap % % | 
					
						
							|  |  |  |         dup regexp-ignore-case? [ "i" % ] when
 | 
					
						
							|  |  |  |     ] "" make | 
					
						
							|  |  |  |     swap present-text ;
 |