Clean up parser combinators

release
Slava Pestov 2007-11-25 17:07:32 -05:00
parent 7925cf2266
commit 12599e03c4
1 changed files with 153 additions and 151 deletions

View File

@ -1,22 +1,23 @@
! Copyright (C) 2004 Chris Double. ! Copyright (C) 2004 Chris Double.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: lazy-lists promises kernel sequences strings math io USING: lazy-lists promises kernel sequences strings math
arrays namespaces splitting ; arrays splitting ;
IN: parser-combinators IN: parser-combinators
! Parser combinator protocol ! Parser combinator protocol
GENERIC: (parse) ( input parser -- list ) GENERIC: parse ( input parser -- list )
M: promise (parse) ( input parser -- list ) M: promise parse ( input parser -- list )
force (parse) ; force parse ;
: parse ( input parser -- promise )
(parse) ;
TUPLE: parse-result parsed unparsed ; TUPLE: parse-result parsed unparsed ;
: parse-1 ( input parser -- result ) : parse-1 ( input parser -- result )
parse car parse-result-parsed ; parse dup nil? [
"Parse error" throw
] [
car parse-result-parsed
] if ;
C: <parse-result> parse-result C: <parse-result> parse-result
@ -24,105 +25,106 @@ TUPLE: token-parser string ;
C: token token-parser ( string -- parser ) C: token token-parser ( string -- parser )
M: token-parser (parse) ( input parser -- list ) M: token-parser parse ( input parser -- list )
token-parser-string swap over ?head-slice [ token-parser-string swap over ?head-slice [
<parse-result> 1list <parse-result> 1list
] [ ] [
2drop nil 2drop nil
] if ; ] if ;
TUPLE: satisfy-parser quot ; TUPLE: satisfy-parser quot ;
C: satisfy satisfy-parser ( quot -- parser ) C: satisfy satisfy-parser ( quot -- parser )
M: satisfy-parser (parse) ( input parser -- list ) M: satisfy-parser parse ( input parser -- list )
#! A parser that succeeds if the predicate, #! A parser that succeeds if the predicate,
#! when passed the first character in the input, returns #! when passed the first character in the input, returns
#! true. #! true.
over empty? [ over empty? [
2drop nil 2drop nil
] [
satisfy-parser-quot >r unclip-slice dup r> call [
swap <parse-result> 1list
] [ ] [
2drop nil satisfy-parser-quot >r unclip-slice dup r> call [
] if swap <parse-result> 1list
] if ; ] [
2drop nil
] if
] if ;
LAZY: any-char-parser ( -- parser ) LAZY: any-char-parser ( -- parser )
[ drop t ] satisfy ; [ drop t ] satisfy ;
TUPLE: epsilon-parser ; TUPLE: epsilon-parser ;
C: epsilon epsilon-parser ( -- parser ) C: epsilon epsilon-parser ( -- parser )
M: epsilon-parser (parse) ( input parser -- list ) M: epsilon-parser parse ( input parser -- list )
#! A parser that parses the empty string. It #! A parser that parses the empty string. It
#! does not consume any input and always returns #! does not consume any input and always returns
#! an empty list as the parse tree with the #! an empty list as the parse tree with the
#! unmodified input. #! unmodified input.
drop "" swap <parse-result> 1list ; drop "" swap <parse-result> 1list ;
TUPLE: succeed-parser result ; TUPLE: succeed-parser result ;
C: succeed succeed-parser ( result -- parser ) C: succeed succeed-parser ( result -- parser )
M: succeed-parser (parse) ( input parser -- list ) M: succeed-parser parse ( input parser -- list )
#! A parser that always returns 'result' as a #! A parser that always returns 'result' as a
#! successful parse with no input consumed. #! successful parse with no input consumed.
succeed-parser-result swap <parse-result> 1list ; succeed-parser-result swap <parse-result> 1list ;
TUPLE: fail-parser ; TUPLE: fail-parser ;
C: fail fail-parser ( -- parser ) C: fail fail-parser ( -- parser )
M: fail-parser (parse) ( input parser -- list ) M: fail-parser parse ( input parser -- list )
#! A parser that always fails and returns #! A parser that always fails and returns
#! an empty list of successes. #! an empty list of successes.
2drop nil ; 2drop nil ;
TUPLE: and-parser parsers ; TUPLE: and-parser parsers ;
: <&> ( parser1 parser2 -- parser ) : <&> ( parser1 parser2 -- parser )
over and-parser? [ over and-parser? [
>r and-parser-parsers r> add >r and-parser-parsers r> add
] [ ] [
2array 2array
] if \ and-parser construct-boa ; ] if and-parser construct-boa ;
: and-parser-parse ( list p1 -- list ) : and-parser-parse ( list p1 -- list )
swap [ swap [
dup parse-result-unparsed rot parse dup parse-result-unparsed rot parse
[ [
>r parse-result-parsed r> >r parse-result-parsed r>
[ parse-result-parsed 2array ] keep [ parse-result-parsed 2array ] keep
parse-result-unparsed <parse-result> parse-result-unparsed <parse-result>
] lmap-with ] lmap-with
] lmap-with lconcat ; ] lmap-with lconcat ;
M: and-parser (parse) ( input parser -- list ) M: and-parser parse ( input parser -- list )
#! Parse 'input' by sequentially combining the #! Parse 'input' by sequentially combining the
#! two parsers. First parser1 is applied to the #! two parsers. First parser1 is applied to the
#! input then parser2 is applied to the rest of #! input then parser2 is applied to the rest of
#! the input strings from the first parser. #! the input strings from the first parser.
and-parser-parsers unclip swapd parse [ [ and-parser-parse ] reduce ] 2curry promise ; and-parser-parsers unclip swapd parse
[ [ and-parser-parse ] reduce ] 2curry promise ;
TUPLE: or-parser p1 p2 ; TUPLE: or-parser p1 p2 ;
C: <|> or-parser ( parser1 parser2 -- parser ) C: <|> or-parser ( parser1 parser2 -- parser )
M: or-parser (parse) ( input parser1 -- list ) M: or-parser parse ( input parser1 -- list )
#! Return the combined list resulting from the parses #! Return the combined list resulting from the parses
#! of parser1 and parser2 being applied to the same #! of parser1 and parser2 being applied to the same
#! input. This implements the choice parsing operator. #! input. This implements the choice parsing operator.
[ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ; [ or-parser-p1 ] keep or-parser-p2 >r dupd parse swap r> parse lappend ;
: left-trim-slice ( string -- string ) : left-trim-slice ( string -- string )
#! Return a new string without any leading whitespace #! Return a new string without any leading whitespace
#! from the original string. #! from the original string.
dup empty? [ dup empty? [
dup first blank? [ 1 tail-slice left-trim-slice ] when dup first blank? [ 1 tail-slice left-trim-slice ] when
] unless ; ] unless ;
TUPLE: sp-parser p1 ; TUPLE: sp-parser p1 ;
@ -130,115 +132,115 @@ TUPLE: sp-parser p1 ;
#! calling the original parser. #! calling the original parser.
C: sp sp-parser ( p1 -- parser ) C: sp sp-parser ( p1 -- parser )
M: sp-parser (parse) ( input parser -- list ) M: sp-parser parse ( input parser -- list )
#! Skip all leading whitespace from the input then call #! Skip all leading whitespace from the input then call
#! the parser on the remaining input. #! the parser on the remaining input.
>r left-trim-slice r> sp-parser-p1 parse ; >r left-trim-slice r> sp-parser-p1 parse ;
TUPLE: just-parser p1 ; TUPLE: just-parser p1 ;
C: just just-parser ( p1 -- parser ) C: just just-parser ( p1 -- parser )
M: just-parser (parse) ( input parser -- result ) M: just-parser parse ( input parser -- result )
#! Calls the given parser on the input removes #! Calls the given parser on the input removes
#! from the results anything where the remaining #! from the results anything where the remaining
#! input to be parsed is not empty. So ensures a #! input to be parsed is not empty. So ensures a
#! fully parsed input string. #! fully parsed input string.
just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ; just-parser-p1 parse [ parse-result-unparsed empty? ] lsubset ;
TUPLE: apply-parser p1 quot ; TUPLE: apply-parser p1 quot ;
C: <@ apply-parser ( parser quot -- parser ) C: <@ apply-parser ( parser quot -- parser )
M: apply-parser (parse) ( input parser -- result ) M: apply-parser parse ( input parser -- result )
#! Calls the parser on the input. For each successfull #! Calls the parser on the input. For each successfull
#! parse the quot is call with the parse result on the stack. #! parse the quot is call with the parse result on the stack.
#! The result of that quotation then becomes the new parse result. #! The result of that quotation then becomes the new parse result.
#! This allows modification of parse tree results (like #! This allows modification of parse tree results (like
#! converting strings to integers, etc). #! converting strings to integers, etc).
[ apply-parser-p1 ] keep apply-parser-quot [ apply-parser-p1 ] keep apply-parser-quot
-rot parse [ -rot parse [
[ parse-result-parsed swap call ] keep [ parse-result-parsed swap call ] keep
parse-result-unparsed <parse-result> parse-result-unparsed <parse-result>
] lmap-with ; ] lmap-with ;
TUPLE: some-parser p1 ; TUPLE: some-parser p1 ;
C: some some-parser ( p1 -- parser ) C: some some-parser ( p1 -- parser )
M: some-parser (parse) ( input parser -- result ) M: some-parser parse ( input parser -- result )
#! Calls the parser on the input, guarantees #! Calls the parser on the input, guarantees
#! the parse is complete (the remaining input is empty), #! the parse is complete (the remaining input is empty),
#! picks the first solution and only returns the parse #! picks the first solution and only returns the parse
#! tree since the remaining input is empty. #! tree since the remaining input is empty.
some-parser-p1 just parse-1 ; some-parser-p1 just parse-1 ;
: <& ( parser1 parser2 -- parser ) : <& ( parser1 parser2 -- parser )
#! Same as <&> except discard the results of the second parser. #! Same as <&> except discard the results of the second parser.
<&> [ first ] <@ ; <&> [ first ] <@ ;
: &> ( parser1 parser2 -- parser ) : &> ( parser1 parser2 -- parser )
#! Same as <&> except discard the results of the first parser. #! Same as <&> except discard the results of the first parser.
<&> [ second ] <@ ; <&> [ second ] <@ ;
: <:&> ( parser1 parser2 -- result ) : <:&> ( parser1 parser2 -- result )
#! Same as <&> except flatten the result. #! Same as <&> except flatten the result.
<&> [ dup second swap first [ % , ] { } make ] <@ ; <&> [ first2 add ] <@ ;
: <&:> ( parser1 parser2 -- result ) : <&:> ( parser1 parser2 -- result )
#! Same as <&> except flatten the result. #! Same as <&> except flatten the result.
<&> [ dup second swap first [ , % ] { } make ] <@ ; <&> [ first2 swap add* ] <@ ;
: <:&:> ( parser1 parser2 -- result ) : <:&:> ( parser1 parser2 -- result )
#! Same as <&> except flatten the result. #! Same as <&> except flatten the result.
<&> [ dup second swap first [ % % ] { } make ] <@ ; <&> [ first2 append ] <@ ;
LAZY: <*> ( parser -- parser ) LAZY: <*> ( parser -- parser )
dup <*> <&:> { } succeed <|> ; dup <*> <&:> { } succeed <|> ;
: <+> ( parser -- parser ) : <+> ( parser -- parser )
#! Return a parser that accepts one or more occurences of the original #! Return a parser that accepts one or more occurences of the original
#! parser. #! parser.
dup <*> <&:> ; dup <*> <&:> ;
LAZY: <?> ( parser -- parser ) LAZY: <?> ( parser -- parser )
#! Return a parser that optionally uses the parser #! Return a parser that optionally uses the parser
#! if that parser would be successfull. #! if that parser would be successfull.
[ 1array ] <@ f succeed <|> ; [ 1array ] <@ f succeed <|> ;
TUPLE: only-first-parser p1 ; TUPLE: only-first-parser p1 ;
LAZY: only-first ( parser -- parser )
\ only-first-parser construct-boa ;
M: only-first-parser (parse) ( input parser -- list ) LAZY: only-first ( parser -- parser )
#! Transform a parser into a parser that only yields only-first-parser construct-boa ;
#! the first possibility.
only-first-parser-p1 parse 1 swap ltake ; M: only-first-parser parse ( input parser -- list )
#! Transform a parser into a parser that only yields
#! the first possibility.
only-first-parser-p1 parse 1 swap ltake ;
LAZY: <!*> ( parser -- parser ) LAZY: <!*> ( parser -- parser )
#! Like <*> but only return one possible result #! Like <*> but only return one possible result
#! containing all matching parses. Does not return #! containing all matching parses. Does not return
#! partial matches. Useful for efficiency since that's #! partial matches. Useful for efficiency since that's
#! usually the effect you want and cuts down on backtracking #! usually the effect you want and cuts down on backtracking
#! required. #! required.
<*> only-first ; <*> only-first ;
LAZY: <!+> ( parser -- parser ) LAZY: <!+> ( parser -- parser )
#! Like <+> but only return one possible result #! Like <+> but only return one possible result
#! containing all matching parses. Does not return #! containing all matching parses. Does not return
#! partial matches. Useful for efficiency since that's #! partial matches. Useful for efficiency since that's
#! usually the effect you want and cuts down on backtracking #! usually the effect you want and cuts down on backtracking
#! required. #! required.
<+> only-first ; <+> only-first ;
LAZY: <!?> ( parser -- parser ) LAZY: <!?> ( parser -- parser )
#! Like <?> but only return one possible result #! Like <?> but only return one possible result
#! containing all matching parses. Does not return #! containing all matching parses. Does not return
#! partial matches. Useful for efficiency since that's #! partial matches. Useful for efficiency since that's
#! usually the effect you want and cuts down on backtracking #! usually the effect you want and cuts down on backtracking
#! required. #! required.
<?> only-first ; <?> only-first ;
LAZY: <(*)> ( parser -- parser ) LAZY: <(*)> ( parser -- parser )
#! Like <*> but take shortest match first. #! Like <*> but take shortest match first.
@ -251,20 +253,20 @@ LAZY: <(+)> ( parser -- parser )
dup <(*)> <&:> ; dup <(*)> <&:> ;
: pack ( close body open -- parser ) : pack ( close body open -- parser )
#! Parse a construct enclosed by two symbols, #! Parse a construct enclosed by two symbols,
#! given a parser for the opening symbol, the #! given a parser for the opening symbol, the
#! closing symbol, and the body. #! closing symbol, and the body.
<& &> ; <& &> ;
: nonempty-list-of ( items separator -- parser ) : nonempty-list-of ( items separator -- parser )
[ over &> <*> <&:> ] keep <?> tuck pack ; [ over &> <*> <&:> ] keep <?> tuck pack ;
: list-of ( items separator -- parser ) : list-of ( items separator -- parser )
#! Given a parser for the separator and for the #! Given a parser for the separator and for the
#! items themselves, return a parser that parses #! items themselves, return a parser that parses
#! lists of those items. The parse tree is an #! lists of those items. The parse tree is an
#! array of the parsed items. #! array of the parsed items.
nonempty-list-of { } succeed <|> ; nonempty-list-of { } succeed <|> ;
LAZY: surrounded-by ( parser start end -- parser' ) LAZY: surrounded-by ( parser start end -- parser' )
[ token ] 2apply swapd pack ; [ token ] 2apply swapd pack ;