350 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Factor
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			350 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Factor
		
	
	
		
			Executable File
		
	
| ! Copyright (C) 2004 Chris Double.
 | |
| ! See http://factorcode.org/license.txt for BSD license.
 | |
| USING: lists lists.lazy promises kernel sequences strings math
 | |
| arrays splitting quotations combinators namespaces
 | |
| unicode.case unicode.categories sequences.deep accessors ;
 | |
| IN: parser-combinators
 | |
| 
 | |
| ! Parser combinator protocol
 | |
| GENERIC: parse ( input parser -- list )
 | |
| 
 | |
| M: promise parse ( input parser -- list )
 | |
|     force parse ;
 | |
| 
 | |
| TUPLE: parse-result parsed unparsed ;
 | |
| 
 | |
| ERROR: cannot-parse input ;
 | |
| 
 | |
| : parse-1 ( input parser -- result )
 | |
|     dupd parse dup nil? [
 | |
|         rot cannot-parse
 | |
|     ] [
 | |
|         nip car parsed>>
 | |
|     ] if ;
 | |
| 
 | |
| C: <parse-result> parse-result
 | |
| 
 | |
| : <parse-results> ( parsed unparsed -- list )
 | |
|     <parse-result> 1list ;
 | |
| 
 | |
| : parse-result-parsed-slice ( parse-result -- slice )
 | |
|     dup parsed>> empty? [
 | |
|         unparsed>> 0 0 rot <slice>
 | |
|     ] [
 | |
|         dup unparsed>>
 | |
|         dup from>> [ rot parsed>> length - ] keep
 | |
|         rot seq>> <slice>
 | |
|     ] if ;
 | |
| 
 | |
| : string= ( str1 str2 ignore-case -- ? )
 | |
|     [ [ >upper ] bi@ ] when sequence= ;
 | |
| 
 | |
| : string-head? ( str head ignore-case -- ? )
 | |
|     2over shorter? [
 | |
|         3drop f
 | |
|     ] [
 | |
|         >r [ length head-slice ] keep r> string=
 | |
|     ] if ;
 | |
| 
 | |
| : ?string-head ( str head ignore-case -- newstr ? )
 | |
|     >r 2dup r> string-head?
 | |
|     [ length tail-slice t ] [ drop f ] if ;
 | |
| 
 | |
| TUPLE: token-parser string ignore-case? ;
 | |
| 
 | |
| C: <token-parser> token-parser
 | |
| 
 | |
| : token ( string -- parser ) f <token-parser> ;
 | |
| 
 | |
| : case-insensitive-token ( string -- parser ) t <token-parser> ;
 | |
| 
 | |
| M: token-parser parse ( input parser -- list )
 | |
|     [ string>> ] [ ignore-case?>> ] bi
 | |
|     >r tuck r> ?string-head
 | |
|     [ <parse-results> ] [ 2drop nil ] if ;
 | |
| 
 | |
| : 1token ( n -- parser ) 1string token ;
 | |
| 
 | |
| TUPLE: satisfy-parser quot ;
 | |
| 
 | |
| C: satisfy satisfy-parser ( quot -- parser )
 | |
| 
 | |
| M: satisfy-parser parse ( input parser -- list )
 | |
|     #! A parser that succeeds if the predicate,
 | |
|     #! when passed the first character in the input, returns
 | |
|     #! true.
 | |
|     over empty? [
 | |
|         2drop nil
 | |
|     ] [
 | |
|         quot>> >r unclip-slice dup r> call
 | |
|         [ swap <parse-results> ] [ 2drop nil ] if
 | |
|     ] if ;
 | |
| 
 | |
| LAZY: any-char-parser ( -- parser )
 | |
|     [ drop t ] satisfy ;
 | |
| 
 | |
| TUPLE: epsilon-parser ;
 | |
| 
 | |
| C: epsilon epsilon-parser ( -- parser )
 | |
| 
 | |
| M: epsilon-parser parse ( input parser -- list )
 | |
|     #! A parser that parses the empty string. It
 | |
|     #! does not consume any input and always returns
 | |
|     #! an empty list as the parse tree with the
 | |
|     #! unmodified input.
 | |
|     drop "" swap <parse-results> ;
 | |
| 
 | |
| TUPLE: succeed-parser result ;
 | |
| 
 | |
| C: succeed succeed-parser ( result -- parser )
 | |
| 
 | |
| M: succeed-parser parse ( input parser -- list )
 | |
|     #! A parser that always returns 'result' as a
 | |
|     #! successful parse with no input consumed.
 | |
|     result>> swap <parse-results> ;
 | |
| 
 | |
| TUPLE: fail-parser ;
 | |
| 
 | |
| C: fail fail-parser ( -- parser )
 | |
| 
 | |
| M: fail-parser parse ( input parser -- list )
 | |
|     #! A parser that always fails and returns
 | |
|     #! an empty list of successes.
 | |
|     2drop nil ;
 | |
| 
 | |
| TUPLE: ensure-parser test ;
 | |
| 
 | |
| : ensure ( parser -- ensure )
 | |
|     ensure-parser boa ;
 | |
| 
 | |
| M: ensure-parser parse ( input parser -- list )
 | |
|     2dup test>> parse nil?
 | |
|     [ 2drop nil ] [ drop t swap <parse-results> ] if ;
 | |
| 
 | |
| TUPLE: ensure-not-parser test ;
 | |
| 
 | |
| : ensure-not ( parser -- ensure )
 | |
|     ensure-not-parser boa ;
 | |
| 
 | |
| M: ensure-not-parser parse ( input parser -- list )
 | |
|     2dup test>> parse nil?
 | |
|     [ drop t swap <parse-results> ] [ 2drop nil ] if ;
 | |
| 
 | |
| TUPLE: and-parser parsers ;
 | |
| 
 | |
| : <&> ( parser1 parser2 -- parser )
 | |
|     over and-parser? [
 | |
|         >r parsers>> r> suffix
 | |
|     ] [
 | |
|         2array
 | |
|     ] if and-parser boa ;
 | |
| 
 | |
| : <and-parser> ( parsers -- parser )
 | |
|     dup length 1 = [ first ] [ and-parser boa ] if ;
 | |
| 
 | |
| : and-parser-parse ( list p1  -- list )
 | |
|     swap [
 | |
|         dup unparsed>> rot parse
 | |
|         [
 | |
|             >r parsed>> r>
 | |
|             [ parsed>> 2array ] keep
 | |
|             unparsed>> <parse-result>
 | |
|         ] lazy-map-with
 | |
|     ] lazy-map-with lconcat ;
 | |
| 
 | |
| M: and-parser parse ( input parser -- list )
 | |
|     #! Parse 'input' by sequentially combining the
 | |
|     #! two parsers. First parser1 is applied to the
 | |
|     #! input then parser2 is applied to the rest of
 | |
|     #! the input strings from the first parser.
 | |
|     parsers>> unclip swapd parse
 | |
|     [ [ and-parser-parse ] reduce ] 2curry promise ;
 | |
| 
 | |
| TUPLE: or-parser parsers ;
 | |
| 
 | |
| : <or-parser> ( parsers -- parser )
 | |
|     dup length 1 = [ first ] [ or-parser boa ] if ;
 | |
| 
 | |
| : <|> ( parser1 parser2 -- parser )
 | |
|     2array <or-parser> ;
 | |
| 
 | |
| M: or-parser parse ( input parser1 -- list )
 | |
|     #! Return the combined list resulting from the parses
 | |
|     #! of parser1 and parser2 being applied to the same
 | |
|     #! input. This implements the choice parsing operator.
 | |
|     parsers>> 0 swap seq>list
 | |
|     [ parse ] lazy-map-with lconcat ;
 | |
| 
 | |
| : left-trim-slice ( string -- string )
 | |
|     #! Return a new string without any leading whitespace
 | |
|     #! from the original string.
 | |
|     dup empty? [
 | |
|         dup first blank? [ rest-slice left-trim-slice ] when
 | |
|     ] unless ;
 | |
| 
 | |
| TUPLE: sp-parser p1 ;
 | |
| 
 | |
| #! Return a parser that first skips all whitespace before
 | |
| #! calling the original parser.
 | |
| C: sp sp-parser ( p1 -- parser )
 | |
| 
 | |
| M: sp-parser parse ( input parser -- list )
 | |
|     #! Skip all leading whitespace from the input then call
 | |
|     #! the parser on the remaining input.
 | |
|     >r left-trim-slice r> p1>> parse ;
 | |
| 
 | |
| TUPLE: just-parser p1 ;
 | |
| 
 | |
| C: just just-parser ( p1 -- parser )
 | |
| 
 | |
| M: just-parser parse ( input parser -- result )
 | |
|     #! Calls the given parser on the input removes
 | |
|     #! from the results anything where the remaining
 | |
|     #! input to be parsed is not empty. So ensures a
 | |
|     #! fully parsed input string.
 | |
|     p1>> parse [ unparsed>> empty? ] lfilter ;
 | |
| 
 | |
| TUPLE: apply-parser p1 quot ;
 | |
| 
 | |
| C: <@ apply-parser ( parser quot -- parser )
 | |
| 
 | |
| M: apply-parser parse ( input parser -- result )
 | |
|     #! Calls the parser on the input. For each successful
 | |
|     #! parse the quot is call with the parse result on the stack.
 | |
|     #! The result of that quotation then becomes the new parse result.
 | |
|     #! This allows modification of parse tree results (like
 | |
|     #! converting strings to integers, etc).
 | |
|     [ p1>> ] [ quot>> ] bi
 | |
|     -rot parse [
 | |
|         [ parsed>> swap call ] keep
 | |
|         unparsed>> <parse-result>
 | |
|     ] lazy-map-with ;
 | |
| 
 | |
| TUPLE: some-parser p1 ;
 | |
| 
 | |
| C: some some-parser ( p1 -- parser )
 | |
| 
 | |
| M: some-parser parse ( input parser -- result )
 | |
|     #! Calls the parser on the input, guarantees
 | |
|     #! the parse is complete (the remaining input is empty),
 | |
|     #! picks the first solution and only returns the parse
 | |
|     #! tree since the remaining input is empty.
 | |
|     p1>> just parse-1 ;
 | |
| 
 | |
| : <& ( parser1 parser2 -- parser )
 | |
|     #! Same as <&> except discard the results of the second parser.
 | |
|     <&> [ first ] <@ ;
 | |
| 
 | |
| : &> ( parser1 parser2 -- parser )
 | |
|     #! Same as <&> except discard the results of the first parser.
 | |
|     <&> [ second ] <@ ;
 | |
| 
 | |
| : <:&> ( parser1 parser2 -- result )
 | |
|     #! Same as <&> except flatten the result.
 | |
|     <&> [ first2 suffix ] <@ ;
 | |
| 
 | |
| : <&:> ( parser1 parser2 -- result )
 | |
|     #! Same as <&> except flatten the result.
 | |
|     <&> [ first2 swap prefix ] <@ ;
 | |
| 
 | |
| : <:&:> ( parser1 parser2 -- result )
 | |
|     #! Same as <&> except flatten the result.
 | |
|     <&> [ first2 append ] <@ ;
 | |
| 
 | |
| LAZY: <*> ( parser -- parser )
 | |
|     dup <*> <&:> { } succeed <|> ;
 | |
| 
 | |
| : <+> ( parser -- parser )
 | |
|     #! Return a parser that accepts one or more occurences of the original
 | |
|     #! parser.
 | |
|     dup <*> <&:> ;
 | |
| 
 | |
| LAZY: <?> ( parser -- parser )
 | |
|     #! Return a parser that optionally uses the parser
 | |
|     #! if that parser would be successful.
 | |
|     [ 1array ] <@ f succeed <|> ;
 | |
| 
 | |
| TUPLE: only-first-parser p1 ;
 | |
| 
 | |
| LAZY: only-first ( parser -- parser )
 | |
|     only-first-parser boa ;
 | |
| 
 | |
| M: only-first-parser parse ( input parser -- list )
 | |
|     #! Transform a parser into a parser that only yields
 | |
|     #! the first possibility.
 | |
|     p1>> parse 1 swap ltake ;
 | |
| 
 | |
| LAZY: <!*> ( parser -- parser )
 | |
|     #! Like <*> but only return one possible result
 | |
|     #! containing all matching parses. Does not return
 | |
|     #! partial matches. Useful for efficiency since that's
 | |
|     #! usually the effect you want and cuts down on backtracking
 | |
|     #! required.
 | |
|     <*> only-first ;
 | |
| 
 | |
| LAZY: <!+> ( parser -- parser )
 | |
|     #! Like <+> but only return one possible result
 | |
|     #! containing all matching parses. Does not return
 | |
|     #! partial matches. Useful for efficiency since that's
 | |
|     #! usually the effect you want and cuts down on backtracking
 | |
|     #! required.
 | |
|     <+> only-first ;
 | |
| 
 | |
| LAZY: <!?> ( parser -- parser )
 | |
|     #! Like <?> but only return one possible result
 | |
|     #! containing all matching parses. Does not return
 | |
|     #! partial matches. Useful for efficiency since that's
 | |
|     #! usually the effect you want and cuts down on backtracking
 | |
|     #! required.
 | |
|     <?> only-first ;
 | |
| 
 | |
| LAZY: <(?)> ( parser -- parser )
 | |
|     #! Like <?> but take shortest match first.
 | |
|     f succeed swap [ 1array ] <@ <|> ;
 | |
| 
 | |
| LAZY: <(*)> ( parser -- parser )
 | |
|     #! Like <*> but take shortest match first.
 | |
|     #! Implementation by Matthew Willis.
 | |
|     { } succeed swap dup <(*)> <&:> <|> ;
 | |
| 
 | |
| LAZY: <(+)> ( parser -- parser )
 | |
|     #! Like <+> but take shortest match first.
 | |
|     #! Implementation by Matthew Willis.
 | |
|     dup <(*)> <&:> ;
 | |
| 
 | |
| : pack ( close body open -- parser )
 | |
|     #! Parse a construct enclosed by two symbols,
 | |
|     #! given a parser for the opening symbol, the
 | |
|     #! closing symbol, and the body.
 | |
|     <& &> ;
 | |
| 
 | |
| : nonempty-list-of ( items separator -- parser )
 | |
|     [ over &> <*> <&:> ] keep <?> tuck pack ;
 | |
| 
 | |
| : list-of ( items separator -- parser )
 | |
|     #! Given a parser for the separator and for the
 | |
|     #! items themselves, return a parser that parses
 | |
|     #! lists of those items. The parse tree is an
 | |
|     #! array of the parsed items.
 | |
|     nonempty-list-of { } succeed <|> ;
 | |
| 
 | |
| LAZY: surrounded-by ( parser start end -- parser' )
 | |
|     [ token ] bi@ swapd pack ;
 | |
| 
 | |
| : exactly-n ( parser n -- parser' )
 | |
|     swap <repetition> <and-parser> [ flatten ] <@ ;
 | |
| 
 | |
| : at-most-n ( parser n -- parser' )
 | |
|     dup zero? [
 | |
|         2drop epsilon
 | |
|     ] [
 | |
|         2dup exactly-n
 | |
|         -rot 1- at-most-n <|>
 | |
|     ] if ;
 | |
| 
 | |
| : at-least-n ( parser n -- parser' )
 | |
|     dupd exactly-n swap <*> <&> ;
 | |
| 
 | |
| : from-m-to-n ( parser m n -- parser' )
 | |
|     >r [ exactly-n ] 2keep r> swap - at-most-n <:&:> ;
 |