2004-08-15 19:23:47 -04:00
|
|
|
! Copyright (C) 2004 Chris Double.
|
2006-10-05 07:03:46 -04:00
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
!
|
2006-08-07 00:35:48 -04:00
|
|
|
USING: lazy-lists kernel sequences strings math io arrays errors namespaces ;
|
2004-08-15 19:23:47 -04:00
|
|
|
IN: parser-combinators
|
|
|
|
|
2006-10-05 18:52:26 -04:00
|
|
|
! Parser combinator protocol
|
|
|
|
GENERIC: (parse) ( input parser -- list )
|
|
|
|
|
|
|
|
: parse ( input parser -- promise )
|
|
|
|
[ (parse) ] curry curry <promise> ;
|
|
|
|
|
2006-08-02 01:03:47 -04:00
|
|
|
TUPLE: parse-result parsed unparsed ;
|
|
|
|
|
2006-10-05 18:52:26 -04:00
|
|
|
: ?head-slice ( seq begin -- newseq ? )
|
|
|
|
2dup head? [ length tail-slice t ] [ drop f ] if ;
|
|
|
|
|
|
|
|
: unclip-slice ( seq -- rest first )
|
|
|
|
dup 1 tail-slice swap first ;
|
|
|
|
|
2006-08-01 23:47:57 -04:00
|
|
|
: h:t ( object -- head tail )
|
2004-08-15 19:23:47 -04:00
|
|
|
#! Return the head and tail of the object.
|
2006-08-02 02:28:07 -04:00
|
|
|
dup empty? [ dup first swap 1 tail ] unless ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
2006-10-05 18:52:26 -04:00
|
|
|
TUPLE: token-parser string ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
|
|
|
: token ( string -- parser )
|
2006-10-05 18:52:26 -04:00
|
|
|
<token-parser> ;
|
|
|
|
|
|
|
|
M: token-parser (parse) ( input parser -- list )
|
|
|
|
token-parser-string swap over ?head-slice [
|
|
|
|
<parse-result> 1list
|
|
|
|
] [
|
|
|
|
2drop nil
|
|
|
|
] if ;
|
|
|
|
|
|
|
|
TUPLE: satisfy-parser quot ;
|
|
|
|
|
|
|
|
: satisfy ( quot -- parser )
|
|
|
|
<satisfy-parser> ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2006-10-05 18:52:26 -04:00
|
|
|
M: satisfy-parser (parse) ( input parser -- list )
|
2005-04-29 23:20:11 -04:00
|
|
|
#! A parser that succeeds if the predicate,
|
|
|
|
#! when passed the first character in the input, returns
|
|
|
|
#! true.
|
2006-10-05 18:52:26 -04:00
|
|
|
satisfy-parser-quot >r unclip-slice dup r> call [
|
|
|
|
swap <parse-result> 1list
|
|
|
|
] [
|
2006-08-02 01:03:47 -04:00
|
|
|
2drop nil
|
2005-09-25 02:03:36 -04:00
|
|
|
] if ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: satisfy2-parser ( inp pred quot -- llist )
|
|
|
|
#! A parser that succeeds if the predicate,
|
2004-08-15 19:23:47 -04:00
|
|
|
#! when passed the first character in the input, returns
|
2005-04-29 23:20:11 -04:00
|
|
|
#! true. On success the quotation is called with the
|
|
|
|
#! successfully parsed character on the stack. The result
|
|
|
|
#! of that call is returned as the result portion of the
|
|
|
|
#! successfull parse lazy list.
|
2006-09-03 19:28:26 -04:00
|
|
|
-rot over first swap call [
|
2006-09-13 21:48:38 -04:00
|
|
|
h:t >r swap call r> <parse-result> 1list
|
2005-04-29 23:20:11 -04:00
|
|
|
] [
|
2006-08-02 01:03:47 -04:00
|
|
|
2drop nil
|
2005-09-25 02:03:36 -04:00
|
|
|
] if ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: satisfy2 ( pred quot -- parser )
|
|
|
|
#! Return a satisfy2-parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ satisfy2-parser ] curry curry ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: epsilon-parser ( input -- llist )
|
|
|
|
#! A parser that parses the empty string. It
|
|
|
|
#! does not consume any input and always returns
|
|
|
|
#! an empty list as the parse tree with the
|
|
|
|
#! unmodified input.
|
2006-09-13 21:48:38 -04:00
|
|
|
"" swap <parse-result> 1list ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
|
|
|
: epsilon ( -- parser )
|
2005-04-29 23:20:11 -04:00
|
|
|
#! Return an epsilon parser
|
|
|
|
[ epsilon-parser ] ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: succeed-parser ( input result -- llist )
|
|
|
|
#! A parser that always returns 'result' as a
|
|
|
|
#! successful parse with no input consumed.
|
2006-09-13 21:48:38 -04:00
|
|
|
swap <parse-result> 1list ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: succeed ( result -- parser )
|
|
|
|
#! Return a succeed parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ succeed-parser ] curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: fail-parser ( input -- llist )
|
|
|
|
#! A parser that always fails and returns
|
|
|
|
#! an empty list of successes.
|
2006-08-02 01:03:47 -04:00
|
|
|
drop nil ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: fail ( -- parser )
|
|
|
|
#! Return a fail-parser.
|
|
|
|
[ fail-parser ] ;
|
|
|
|
|
2006-10-05 07:03:46 -04:00
|
|
|
: <&>-parser ( input parser1 parser2 -- parser )
|
2005-04-29 23:20:11 -04:00
|
|
|
#! Parse 'input' by sequentially combining the
|
|
|
|
#! two parsers. First parser1 is applied to the
|
|
|
|
#! input then parser2 is applied to the rest of
|
|
|
|
#! the input strings from the first parser.
|
2006-10-05 07:14:31 -04:00
|
|
|
-rot call [
|
2006-10-05 07:03:46 -04:00
|
|
|
dup parse-result-unparsed rot call
|
|
|
|
[
|
|
|
|
>r parse-result-parsed r>
|
|
|
|
[ parse-result-parsed 2array ] keep
|
|
|
|
parse-result-unparsed <parse-result>
|
|
|
|
] lmap-with
|
|
|
|
] lmap-with lconcat ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: <&> ( parser1 parser2 -- parser )
|
2004-08-15 19:23:47 -04:00
|
|
|
#! Sequentially combine two parsers, returning a parser
|
|
|
|
#! that first calls p1, then p2 all remaining results from
|
|
|
|
#! p1.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ <&>-parser ] curry curry ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: <|>-parser ( input parser1 parser2 -- result )
|
|
|
|
#! Return the combined list resulting from the parses
|
|
|
|
#! of parser1 and parser2 being applied to the same
|
|
|
|
#! input. This implements the choice parsing operator.
|
|
|
|
>r dupd call swap r> call lappend ;
|
|
|
|
|
2004-08-15 19:23:47 -04:00
|
|
|
: <|> ( p1 p2 -- parser )
|
|
|
|
#! Choice operator for parsers. Return a parser that does
|
|
|
|
#! p1 or p2 depending on which will succeed.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ <|>-parser ] curry curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: string-ltrim ( string -- string )
|
|
|
|
#! Return a new string without any leading whitespace
|
|
|
|
#! from the original string.
|
2006-08-01 23:47:57 -04:00
|
|
|
dup first blank? [ 1 tail string-ltrim ] when ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: sp-parser ( input parser -- result )
|
|
|
|
#! Skip all leading whitespace from the input then call
|
|
|
|
#! the parser on the remaining input.
|
|
|
|
>r string-ltrim r> call ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
|
|
|
: sp ( parser -- parser )
|
|
|
|
#! Return a parser that first skips all whitespace before
|
2005-04-29 23:20:11 -04:00
|
|
|
#! calling the original parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ sp-parser ] curry ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: just-parser ( input parser -- result )
|
|
|
|
#! Calls the given parser on the input removes
|
|
|
|
#! from the results anything where the remaining
|
|
|
|
#! input to be parsed is not empty. So ensures a
|
|
|
|
#! fully parsed input string.
|
2006-08-02 01:03:47 -04:00
|
|
|
call [ parse-result-unparsed empty? ] lsubset ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
|
|
|
: just ( parser -- parser )
|
2005-04-29 23:20:11 -04:00
|
|
|
#! Return an instance of the just-parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ just-parser ] curry ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: <@-parser ( input parser quot -- result )
|
|
|
|
#! Calls the parser on the input. For each successfull
|
|
|
|
#! parse the quot is call with the parse result on the stack.
|
|
|
|
#! The result of that quotation then becomes the new parse result.
|
|
|
|
#! This allows modification of parse tree results (like
|
|
|
|
#! converting strings to integers, etc).
|
2006-10-05 07:14:31 -04:00
|
|
|
-rot call [
|
|
|
|
[ parse-result-parsed swap call ] keep
|
|
|
|
parse-result-unparsed <parse-result>
|
|
|
|
] lmap-with ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: <@ ( parser quot -- parser )
|
|
|
|
#! Return an <@-parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ <@-parser ] curry curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: some-parser ( input parser -- result )
|
|
|
|
#! Calls the parser on the input, guarantees
|
|
|
|
#! the parse is complete (the remaining input is empty),
|
|
|
|
#! picks the first solution and only returns the parse
|
|
|
|
#! tree since the remaining input is empty.
|
2006-08-02 01:03:47 -04:00
|
|
|
just call car parse-result-parsed ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: some ( parser -- deterministic-parser )
|
|
|
|
#! Creates a 'some-parser'.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ some-parser ] curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: <& ( parser1 parser2 -- parser )
|
|
|
|
#! Same as <&> except discard the results of the second parser.
|
2006-10-05 07:25:20 -04:00
|
|
|
<&> [ first ] <@ ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: &> ( parser1 parser2 -- parser )
|
|
|
|
#! Same as <&> except discard the results of the first parser.
|
2006-10-05 07:25:20 -04:00
|
|
|
<&> [ second ] <@ ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: <:&>-parser ( input parser1 parser2 -- result )
|
2006-08-02 01:03:47 -04:00
|
|
|
#! Same as <&> except flatten the result.
|
2006-08-07 00:35:48 -04:00
|
|
|
<&> [ dup second swap first [ % , ] { } make ] <@ call ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
|
|
|
: <:&> ( parser1 parser2 -- parser )
|
2006-08-02 01:03:47 -04:00
|
|
|
#! Same as <&> except flatten the result.
|
|
|
|
[ <:&>-parser ] curry curry ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
2006-08-07 00:35:48 -04:00
|
|
|
: <&:>-parser ( input parser1 parser2 -- result )
|
|
|
|
#! Same as <&> except flatten the result.
|
|
|
|
<&> [ dup second swap first [ , % ] { } make ] <@ call ;
|
|
|
|
|
|
|
|
: <&:> ( parser1 parser2 -- parser )
|
|
|
|
#! Same as <&> except flatten the result.
|
|
|
|
[ <&:>-parser ] curry curry ;
|
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
DEFER: <*>
|
|
|
|
|
|
|
|
: (<*>) ( parser -- parser )
|
|
|
|
#! Non-delayed implementation of <*>
|
2006-08-07 00:35:48 -04:00
|
|
|
dup <*> <&:> [ ] succeed <|> ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
2004-08-15 19:23:47 -04:00
|
|
|
: <*> ( parser -- parser )
|
|
|
|
#! Return a parser that accepts zero or more occurences of the original
|
|
|
|
#! parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ (<*>) call ] curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: (<+>) ( parser -- parser )
|
|
|
|
#! Non-delayed implementation of <+>
|
2006-08-07 00:35:48 -04:00
|
|
|
dup <*> <&:> ;
|
2005-04-29 23:20:11 -04:00
|
|
|
|
2004-08-15 19:23:47 -04:00
|
|
|
: <+> ( parser -- parser )
|
|
|
|
#! Return a parser that accepts one or more occurences of the original
|
|
|
|
#! parser.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ (<+>) call ] curry ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: (<?>) ( parser -- parser )
|
|
|
|
#! Non-delayed implementation of <?>
|
2006-08-07 07:06:22 -04:00
|
|
|
[ unit ] <@ f succeed <|> ;
|
2004-08-15 19:23:47 -04:00
|
|
|
|
2005-04-29 23:20:11 -04:00
|
|
|
: <?> ( parser -- parser )
|
|
|
|
#! Return a parser that optionally uses the parser
|
|
|
|
#! if that parser would be successfull.
|
2006-08-02 01:03:47 -04:00
|
|
|
[ (<?>) call ] curry ;
|