2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2008, 2009 Doug Coleman, Daniel Ehrenberg.
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								combinators regexp.classes strings splitting peg locals accessors
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:03:26 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								regexp.ast unicode.case unicode.script.private unicode.categories
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								memoize interval-maps sets unicode.data combinators.short-circuit ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-18 15:42:16 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: regexp.parser
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-19 17:48:46 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: allowed-char? ( ch -- ? )
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-25 13:22:12 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ".()|[*+?$^" member? not ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ERROR: bad-number ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: ensure-number ( n -- n )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ bad-number ] unless* ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:: at-error ( key assoc quot: ( key -- replacement ) -- value )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    key assoc at* [ drop key quot call ] unless ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ERROR: bad-class name ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:03:26 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: simple ( str -- simple )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ! Alternatively, first collation key level?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    >case-fold [ " \t_" member? not ] filter ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: simple-table ( seq -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ simple ] keep ] H{ } map>assoc ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								MEMO: simple-script-table ( -- table )
							 | 
						
					
						
							
								
									
										
										
										
											2010-02-27 14:52:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    script-table interval-values members simple-table ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:03:26 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								MEMO: simple-category-table ( -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    categories simple-table ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-17 20:39:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-unicode-class ( name -- class )
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:03:26 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            >upper first
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            <category-range-class>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup >title categories member? ] [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            simple-category-table at <category-class>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ "script=" ?head ] [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            dup simple-script-table at
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ <script-class> ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ "script=" prepend bad-class ] ?if
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ bad-class ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } cond ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-17 20:39:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: unicode-class ( name -- class )
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-18 17:09:45 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    dup parse-unicode-class [ ] [ bad-class ] ?if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-17 20:39:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: name>class ( name -- class )
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:03:26 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    >string simple {
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-16 18:53:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { "lower" letter-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "upper" LETTER-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "alpha" Letter-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "ascii" ascii-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "digit" digit-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "alnum" alpha-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "punct" punctuation-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "graph" java-printable-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "blank" non-newline-blank-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "cntrl" control-character-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "xdigit" hex-digit-class }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "space" java-blank-class }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-17 20:39:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    } [ unicode-class ] at-error ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: lookup-escape ( char -- ast )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: t [ CHAR: \t ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: n [ CHAR: \n ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: r [ CHAR: \r ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: f [ HEX: c ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: a [ HEX: 7 ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: e [ HEX: 1b ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: \\ [ CHAR: \\ ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: w [ c-identifier-class <primitive-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-19 19:28:54 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: W [ c-identifier-class <primitive-class> <not-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: s [ java-blank-class <primitive-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-19 19:28:54 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: S [ java-blank-class <primitive-class> <not-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: d [ digit-class <primitive-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-19 19:28:54 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: D [ digit-class <primitive-class> <not-class> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-25 13:22:12 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: z [ end-of-input <tagged-epsilon> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: Z [ end-of-file <tagged-epsilon> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-25 13:22:12 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: A [ beginning-of-input <tagged-epsilon> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-11 16:51:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { CHAR: b [ word-break <tagged-epsilon> ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: B [ word-break <not-class> <tagged-epsilon> ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 23:17:47 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: options-assoc ( -- assoc )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    H{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: i case-insensitive }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: d unix-lines }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: m multiline }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: r reversed-regexp }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: s dotall }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-04-12 14:57:49 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								ERROR: nonexistent-option name ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: ch>option ( ch -- singleton )
							 | 
						
					
						
							
								
									
										
										
										
											2009-04-12 14:57:49 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    dup options-assoc at [ ] [ nonexistent-option ] ?if ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 23:17:47 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: option>ch ( option -- string )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    options-assoc value-at ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-21 18:12:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: parse-options ( on off -- options )
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ [ ch>option ] { } map-as ] bi@ <options> ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: string>options ( string -- options )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "-" split1 parse-options ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: options>string ( options -- string )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ on>> ] [ off>> ] bi
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ option>ch ] map ] bi@
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-20 19:45:24 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ "-" glue ] unless-empty
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    "" like ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! TODO: add syntax for various parenthized things,
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								!       add greedy and nongreedy forms of matching
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! (once it's all implemented)
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								EBNF: parse-regexp
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								CharacterInBracket = !("}") Character
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								QuotedCharacter = !("\\E") .
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-16 18:53:38 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Escape = "p{" CharacterInBracket*:s "}" => [[ s name>class <primitive-class> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2010-03-23 00:30:49 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								       | "P{" CharacterInBracket*:s "}" => [[ s name>class <primitive-class> <not-class> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								       | "Q" QuotedCharacter*:s "\\E" => [[ s <concatenation> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								       | "u" Character:a Character:b Character:c Character:d
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            => [[ { a b c d } hex> ensure-number ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       | "x" Character:a Character:b
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            => [[ { a b } hex> ensure-number ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       | "0" Character:a Character:b Character:c
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            => [[ { a b c } oct> ensure-number ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       | . => [[ lookup-escape ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-21 18:55:25 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								EscapeSequence = "\\" Escape:e => [[ e ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-25 13:22:12 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Character = EscapeSequence
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | "$" => [[ $ <tagged-epsilon> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | "^" => [[ ^ <tagged-epsilon> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | . ?[ allowed-char? ]?
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 03:53:36 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								RangeCharacter = !("]") AnyRangeCharacter
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-21 18:12:26 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 03:53:36 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      | RangeCharacter
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 23:17:47 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 03:53:36 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								           | AnyRangeCharacter
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Ranges = StartRange:s Range*:r => [[ r s prefix ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 03:53:36 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								BasicCharClass =  "^"?:n Ranges:e => [[ e n char-class ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CharClass = BasicCharClass:b "&&" CharClass:c
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                => [[ b c 2array <and-class> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | BasicCharClass:b "||" CharClass:c
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                => [[ b c 2array <or-class> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | BasicCharClass:b "~~" CharClass:c
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                => [[ b c <sym-diff-class> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | BasicCharClass:b "--" CharClass:c
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                => [[ b c <minus-class> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								          | BasicCharClass
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 01:18:27 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Options = [idmsux]*
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 01:18:27 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Parenthized = "?:" Alternation:a => [[ a ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            | "?" Options:on "-"? Options:off ":" Alternation:a
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                => [[ a on off parse-options <with-options> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            | "?#" [^)]* => [[ f ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-19 17:48:46 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            | "?~" Alternation:a => [[ a <negation> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-11 16:51:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            | "?=" Alternation:a => [[ a <lookahead> <tagged-epsilon> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            | "?!" Alternation:a => [[ a <lookahead> <not-class> <tagged-epsilon> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            | "?<=" Alternation:a => [[ a <lookbehind> <tagged-epsilon> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            | "?<!" Alternation:a => [[ a <lookbehind> <not-class> <tagged-epsilon> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            | Alternation
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 01:18:27 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Element = "(" Parenthized:p ")" => [[ p ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        | "[" CharClass:r "]" => [[ r ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-17 20:39:04 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        | ".":d => [[ dot ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        | Character
							 | 
						
					
						
							
								
									
										
										
										
											2008-11-24 01:18:27 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Number = (!(","|"}").)* => [[ string>number ensure-number ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Times = "," Number:n "}" => [[ 0 n <from-to> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      | Number:n ",}" => [[ n <at-least> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      | Number:n "}" => [[ n n <from-to> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      | "}" => [[ bad-number ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      | Number:n "," Number:m "}" => [[ n m <from-to> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Repeated = Element:e "{" Times:t => [[ e t <times> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-08 19:50:41 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         | Element:e "??" => [[ e <maybe> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         | Element:e "*?" => [[ e <star> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         | Element:e "+?" => [[ e <plus> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         | Element:e "?" => [[ e <maybe> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         | Element:e "*" => [[ e <star> ]]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         | Element:e "+" => [[ e <plus> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         | Element
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Concatenation = Repeated*:r => [[ r sift <concatenation> ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Alternation = Concatenation:c ("|" Concatenation)*:a
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-18 13:27:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                => [[ a empty? [ c ] [ a values c prefix <alternation> ] if ]]
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								End = !(.)
							 | 
						
					
						
							
								
									
										
										
										
											2008-08-18 12:24:18 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-16 21:23:00 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Main = Alternation End
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								;EBNF
							 |