! Copyright (C) 2004 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! USING: lazy-lists kernel sequences sequences-contrib strings math io arrays errors namespaces ; IN: parser-combinators ! Parser combinator protocol GENERIC: (parse) ( input parser -- list ) M: promise (parse) ( input parser -- list ) force (parse) ; LAZY: parse ( input parser -- promise ) (parse) ; TUPLE: parse-result parsed unparsed ; TUPLE: token-parser string ; LAZY: token ( string -- parser ) ; M: token-parser (parse) ( input parser -- list ) token-parser-string swap over ?head-slice [ 1list ] [ 2drop nil ] if ; TUPLE: satisfy-parser quot ; LAZY: satisfy ( quot -- parser ) ; M: satisfy-parser (parse) ( input parser -- list ) #! A parser that succeeds if the predicate, #! when passed the first character in the input, returns #! true. over empty? [ 2drop nil ] [ satisfy-parser-quot >r unclip-slice dup r> call [ swap 1list ] [ 2drop nil ] if ] if ; TUPLE: epsilon-parser ; LAZY: epsilon ( -- parser ) ; M: epsilon-parser (parse) ( input parser -- list ) #! A parser that parses the empty string. It #! does not consume any input and always returns #! an empty list as the parse tree with the #! unmodified input. drop "" swap 1list ; TUPLE: succeed-parser result ; LAZY: succeed ( result -- parser ) ; M: succeed-parser (parse) ( input parser -- list ) #! A parser that always returns 'result' as a #! successful parse with no input consumed. succeed-parser-result swap 1list ; TUPLE: fail-parser ; LAZY: fail ( -- parser ) ; M: fail-parser (parse) ( input parser -- list ) #! A parser that always fails and returns #! an empty list of successes. 2drop nil ; TUPLE: and-parser p1 p2 ; LAZY: <&> ( parser1 parser2 -- parser ) ; M: and-parser (parse) ( input parser -- list ) #! Parse 'input' by sequentially combining the #! two parsers. First parser1 is applied to the #! input then parser2 is applied to the rest of #! the input strings from the first parser. [ and-parser-p1 ] keep and-parser-p2 -rot parse [ dup parse-result-unparsed rot parse [ >r parse-result-parsed r> [ parse-result-parsed 2array ] keep parse-result-unparsed ] lmap-with ] lmap-with lconcat ; TUPLE: or-parser p1 p2 ; LAZY: <|> ( parser1 parser2 -- parser ) ; M: or-parser (parse) ( input parser1 -- list ) #! Return the combined list resulting from the parses #! of parser1 and parser2 being applied to the same #! input. This implements the choice parsing operator. [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ; : ltrim-slice ( string -- string ) #! Return a new string without any leading whitespace #! from the original string. dup empty? [ dup first blank? [ 1 tail-slice ltrim-slice ] when ] unless ; TUPLE: sp-parser p1 ; LAZY: sp ( p1 -- parser ) #! Return a parser that first skips all whitespace before #! calling the original parser. ; M: sp-parser (parse) ( input parser -- list ) #! Skip all leading whitespace from the input then call #! the parser on the remaining input. >r ltrim-slice r> sp-parser-p1 parse ; TUPLE: just-parser p1 ; LAZY: just ( p1 -- parser ) ; M: just-parser (parse) ( input parser -- result ) #! Calls the given parser on the input removes #! from the results anything where the remaining #! input to be parsed is not empty. So ensures a #! fully parsed input string. just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ; TUPLE: apply-parser p1 quot ; LAZY: <@ ( parser quot -- parser ) ; M: apply-parser (parse) ( input parser -- result ) #! Calls the parser on the input. For each successfull #! parse the quot is call with the parse result on the stack. #! The result of that quotation then becomes the new parse result. #! This allows modification of parse tree results (like #! converting strings to integers, etc). [ apply-parser-p1 ] keep apply-parser-quot -rot parse [ [ parse-result-parsed swap call ] keep parse-result-unparsed ] lmap-with ; TUPLE: some-parser p1 ; LAZY: some ( p1 -- parser ) ; M: some-parser (parse) ( input parser -- result ) #! Calls the parser on the input, guarantees #! the parse is complete (the remaining input is empty), #! picks the first solution and only returns the parse #! tree since the remaining input is empty. some-parser-p1 just parse car parse-result-parsed ; LAZY: <& ( parser1 parser2 -- parser ) #! Same as <&> except discard the results of the second parser. <&> [ first ] <@ ; LAZY: &> ( parser1 parser2 -- parser ) #! Same as <&> except discard the results of the first parser. <&> [ second ] <@ ; LAZY: <:&> ( parser1 parser2 -- result ) #! Same as <&> except flatten the result. <&> [ dup second swap first [ % , ] { } make ] <@ ; LAZY: <&:> ( parser1 parser2 -- result ) #! Same as <&> except flatten the result. <&> [ dup second swap first [ , % ] { } make ] <@ ; LAZY: <*> ( parser -- parser ) dup <*> <&:> { } succeed <|> ; LAZY: <+> ( parser -- parser ) #! Return a parser that accepts one or more occurences of the original #! parser. dup <*> <&:> ; LAZY: ( parser -- parser ) #! Return a parser that optionally uses the parser #! if that parser would be successfull. [ 1array ] <@ f succeed <|> ; TUPLE: only-first-parser p1 ; LAZY: only-first ( parser -- parser ) ; M: only-first-parser (parse) ( input parser -- list ) #! Transform a parser into a parser that only yields #! the first possibility. only-first-parser-p1 parse 1 swap ltake ; LAZY: ( parser -- parser ) #! Like <*> but only return one possible result #! containing all matching parses. Does not return #! partial matches. Useful for efficiency since that's #! usually the effect you want and cuts down on backtracking #! required. <*> only-first ; LAZY: ( parser -- parser ) #! Like <+> but only return one possible result #! containing all matching parses. Does not return #! partial matches. Useful for efficiency since that's #! usually the effect you want and cuts down on backtracking #! required. <+> only-first ; LAZY: ( parser -- parser ) #! Like but only return one possible result #! containing all matching parses. Does not return #! partial matches. Useful for efficiency since that's #! usually the effect you want and cuts down on backtracking #! required. only-first ;