2007-09-20 18:09:08 -04:00
|
|
|
! Copyright (C) 2004 Chris Double.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2007-11-25 17:07:32 -05:00
|
|
|
USING: lazy-lists promises kernel sequences strings math
|
2008-02-01 00:59:29 -05:00
|
|
|
arrays splitting quotations combinators namespaces
|
2008-03-03 14:29:28 -05:00
|
|
|
unicode.case unicode.categories sequences.deep ;
|
2007-09-20 18:09:08 -04:00
|
|
|
IN: parser-combinators
|
|
|
|
|
|
|
|
! Parser combinator protocol
|
2007-11-25 17:07:32 -05:00
|
|
|
GENERIC: parse ( input parser -- list )
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: promise parse ( input parser -- list )
|
|
|
|
force parse ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-24 18:33:20 -05:00
|
|
|
TUPLE: parse-result parsed unparsed ;
|
|
|
|
|
2007-11-24 18:07:01 -05:00
|
|
|
: parse-1 ( input parser -- result )
|
2007-11-28 02:12:42 -05:00
|
|
|
dupd parse dup nil? [
|
|
|
|
"Cannot parse " rot append throw
|
2007-11-25 17:07:32 -05:00
|
|
|
] [
|
2007-11-28 02:12:42 -05:00
|
|
|
nip car parse-result-parsed
|
2007-11-25 17:07:32 -05:00
|
|
|
] if ;
|
2007-11-24 18:07:01 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
C: <parse-result> parse-result
|
|
|
|
|
2007-12-10 02:20:16 -05:00
|
|
|
: <parse-results> ( parsed unparsed -- list )
|
|
|
|
<parse-result> 1list ;
|
|
|
|
|
2007-11-30 20:20:38 -05:00
|
|
|
: parse-result-parsed-slice ( parse-result -- slice )
|
|
|
|
dup parse-result-parsed empty? [
|
|
|
|
parse-result-unparsed 0 0 rot <slice>
|
|
|
|
] [
|
|
|
|
dup parse-result-unparsed
|
|
|
|
dup slice-from [ rot parse-result-parsed length - ] keep
|
|
|
|
rot slice-seq <slice>
|
|
|
|
] if ;
|
|
|
|
|
2007-12-08 03:21:50 -05:00
|
|
|
: string= ( str1 str2 ignore-case -- ? )
|
2008-03-29 21:36:58 -04:00
|
|
|
[ [ >upper ] bi@ ] when sequence= ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-12-08 03:21:50 -05:00
|
|
|
: string-head? ( str head ignore-case -- ? )
|
2008-02-16 19:50:26 -05:00
|
|
|
2over shorter? [
|
2007-12-08 03:21:50 -05:00
|
|
|
3drop f
|
2007-11-25 17:07:32 -05:00
|
|
|
] [
|
2007-12-08 03:21:50 -05:00
|
|
|
>r [ length head-slice ] keep r> string=
|
2007-11-25 17:07:32 -05:00
|
|
|
] if ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-12-08 03:21:50 -05:00
|
|
|
: ?string-head ( str head ignore-case -- newstr ? )
|
|
|
|
>r 2dup r> string-head?
|
|
|
|
[ length tail-slice t ] [ drop f ] if ;
|
|
|
|
|
|
|
|
TUPLE: token-parser string ignore-case? ;
|
|
|
|
|
|
|
|
C: <token-parser> token-parser
|
|
|
|
|
|
|
|
: token ( string -- parser ) f <token-parser> ;
|
|
|
|
|
|
|
|
: case-insensitive-token ( string -- parser ) t <token-parser> ;
|
|
|
|
|
|
|
|
M: token-parser parse ( input parser -- list )
|
|
|
|
dup token-parser-string swap token-parser-ignore-case?
|
|
|
|
>r tuck r> ?string-head
|
2007-12-10 02:20:16 -05:00
|
|
|
[ <parse-results> ] [ 2drop nil ] if ;
|
2007-12-08 03:21:50 -05:00
|
|
|
|
2007-11-26 18:38:24 -05:00
|
|
|
: 1token ( n -- parser ) 1string token ;
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
TUPLE: satisfy-parser quot ;
|
|
|
|
|
|
|
|
C: satisfy satisfy-parser ( quot -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: satisfy-parser parse ( input parser -- list )
|
|
|
|
#! A parser that succeeds if the predicate,
|
|
|
|
#! when passed the first character in the input, returns
|
|
|
|
#! true.
|
|
|
|
over empty? [
|
|
|
|
2drop nil
|
2007-09-20 18:09:08 -04:00
|
|
|
] [
|
2007-12-10 02:20:16 -05:00
|
|
|
satisfy-parser-quot >r unclip-slice dup r> call
|
|
|
|
[ swap <parse-results> ] [ 2drop nil ] if
|
2007-11-25 17:07:32 -05:00
|
|
|
] if ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-24 18:07:01 -05:00
|
|
|
LAZY: any-char-parser ( -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
[ drop t ] satisfy ;
|
2007-11-24 18:07:01 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
TUPLE: epsilon-parser ;
|
|
|
|
|
|
|
|
C: epsilon epsilon-parser ( -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: epsilon-parser parse ( input parser -- list )
|
|
|
|
#! A parser that parses the empty string. It
|
|
|
|
#! does not consume any input and always returns
|
|
|
|
#! an empty list as the parse tree with the
|
|
|
|
#! unmodified input.
|
2007-12-10 02:20:16 -05:00
|
|
|
drop "" swap <parse-results> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: succeed-parser result ;
|
|
|
|
|
|
|
|
C: succeed succeed-parser ( result -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: succeed-parser parse ( input parser -- list )
|
|
|
|
#! A parser that always returns 'result' as a
|
|
|
|
#! successful parse with no input consumed.
|
2007-12-10 02:20:16 -05:00
|
|
|
succeed-parser-result swap <parse-results> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: fail-parser ;
|
|
|
|
|
|
|
|
C: fail fail-parser ( -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: fail-parser parse ( input parser -- list )
|
|
|
|
#! A parser that always fails and returns
|
|
|
|
#! an empty list of successes.
|
|
|
|
2drop nil ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-12-10 02:20:16 -05:00
|
|
|
TUPLE: ensure-parser test ;
|
|
|
|
|
|
|
|
: ensure ( parser -- ensure )
|
2008-04-13 16:06:27 -04:00
|
|
|
ensure-parser boa ;
|
2007-12-10 02:20:16 -05:00
|
|
|
|
|
|
|
M: ensure-parser parse ( input parser -- list )
|
|
|
|
2dup ensure-parser-test parse nil?
|
|
|
|
[ 2drop nil ] [ drop t swap <parse-results> ] if ;
|
|
|
|
|
|
|
|
TUPLE: ensure-not-parser test ;
|
|
|
|
|
|
|
|
: ensure-not ( parser -- ensure )
|
2008-04-13 16:06:27 -04:00
|
|
|
ensure-not-parser boa ;
|
2007-12-10 02:20:16 -05:00
|
|
|
|
|
|
|
M: ensure-not-parser parse ( input parser -- list )
|
|
|
|
2dup ensure-not-parser-test parse nil?
|
|
|
|
[ drop t swap <parse-results> ] [ 2drop nil ] if ;
|
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
TUPLE: and-parser parsers ;
|
|
|
|
|
|
|
|
: <&> ( parser1 parser2 -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
over and-parser? [
|
2008-03-31 20:18:05 -04:00
|
|
|
>r and-parser-parsers r> suffix
|
2007-11-25 17:07:32 -05:00
|
|
|
] [
|
|
|
|
2array
|
2008-04-13 16:06:27 -04:00
|
|
|
] if and-parser boa ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-28 02:12:42 -05:00
|
|
|
: <and-parser> ( parsers -- parser )
|
2008-04-13 16:06:27 -04:00
|
|
|
dup length 1 = [ first ] [ and-parser boa ] if ;
|
2007-11-28 02:12:42 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: and-parser-parse ( list p1 -- list )
|
2007-11-25 17:07:32 -05:00
|
|
|
swap [
|
|
|
|
dup parse-result-unparsed rot parse
|
|
|
|
[
|
|
|
|
>r parse-result-parsed r>
|
|
|
|
[ parse-result-parsed 2array ] keep
|
|
|
|
parse-result-unparsed <parse-result>
|
|
|
|
] lmap-with
|
|
|
|
] lmap-with lconcat ;
|
|
|
|
|
|
|
|
M: and-parser parse ( input parser -- list )
|
|
|
|
#! Parse 'input' by sequentially combining the
|
|
|
|
#! two parsers. First parser1 is applied to the
|
|
|
|
#! input then parser2 is applied to the rest of
|
|
|
|
#! the input strings from the first parser.
|
|
|
|
and-parser-parsers unclip swapd parse
|
|
|
|
[ [ and-parser-parse ] reduce ] 2curry promise ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-28 02:12:42 -05:00
|
|
|
TUPLE: or-parser parsers ;
|
|
|
|
|
|
|
|
: <or-parser> ( parsers -- parser )
|
2008-04-13 16:06:27 -04:00
|
|
|
dup length 1 = [ first ] [ or-parser boa ] if ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-28 02:12:42 -05:00
|
|
|
: <|> ( parser1 parser2 -- parser )
|
|
|
|
2array <or-parser> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: or-parser parse ( input parser1 -- list )
|
|
|
|
#! Return the combined list resulting from the parses
|
|
|
|
#! of parser1 and parser2 being applied to the same
|
|
|
|
#! input. This implements the choice parsing operator.
|
2007-11-28 02:12:42 -05:00
|
|
|
or-parser-parsers 0 swap seq>list
|
|
|
|
[ parse ] lmap-with lconcat ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-10-21 17:37:50 -04:00
|
|
|
: left-trim-slice ( string -- string )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Return a new string without any leading whitespace
|
|
|
|
#! from the original string.
|
|
|
|
dup empty? [
|
2008-04-26 03:01:43 -04:00
|
|
|
dup first blank? [ rest-slice left-trim-slice ] when
|
2007-11-25 17:07:32 -05:00
|
|
|
] unless ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: sp-parser p1 ;
|
|
|
|
|
|
|
|
#! Return a parser that first skips all whitespace before
|
|
|
|
#! calling the original parser.
|
|
|
|
C: sp sp-parser ( p1 -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: sp-parser parse ( input parser -- list )
|
|
|
|
#! Skip all leading whitespace from the input then call
|
|
|
|
#! the parser on the remaining input.
|
|
|
|
>r left-trim-slice r> sp-parser-p1 parse ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: just-parser p1 ;
|
|
|
|
|
|
|
|
C: just just-parser ( p1 -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: just-parser parse ( input parser -- result )
|
|
|
|
#! Calls the given parser on the input removes
|
|
|
|
#! from the results anything where the remaining
|
|
|
|
#! input to be parsed is not empty. So ensures a
|
|
|
|
#! fully parsed input string.
|
2008-04-26 00:17:08 -04:00
|
|
|
just-parser-p1 parse [ parse-result-unparsed empty? ] lfilter ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: apply-parser p1 quot ;
|
|
|
|
|
|
|
|
C: <@ apply-parser ( parser quot -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: apply-parser parse ( input parser -- result )
|
2007-12-10 02:20:16 -05:00
|
|
|
#! Calls the parser on the input. For each successful
|
2007-11-25 17:07:32 -05:00
|
|
|
#! parse the quot is call with the parse result on the stack.
|
|
|
|
#! The result of that quotation then becomes the new parse result.
|
|
|
|
#! This allows modification of parse tree results (like
|
|
|
|
#! converting strings to integers, etc).
|
|
|
|
[ apply-parser-p1 ] keep apply-parser-quot
|
|
|
|
-rot parse [
|
|
|
|
[ parse-result-parsed swap call ] keep
|
|
|
|
parse-result-unparsed <parse-result>
|
|
|
|
] lmap-with ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: some-parser p1 ;
|
|
|
|
|
|
|
|
C: some some-parser ( p1 -- parser )
|
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: some-parser parse ( input parser -- result )
|
|
|
|
#! Calls the parser on the input, guarantees
|
|
|
|
#! the parse is complete (the remaining input is empty),
|
|
|
|
#! picks the first solution and only returns the parse
|
|
|
|
#! tree since the remaining input is empty.
|
|
|
|
some-parser-p1 just parse-1 ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: <& ( parser1 parser2 -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Same as <&> except discard the results of the second parser.
|
|
|
|
<&> [ first ] <@ ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: &> ( parser1 parser2 -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Same as <&> except discard the results of the first parser.
|
|
|
|
<&> [ second ] <@ ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: <:&> ( parser1 parser2 -- result )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Same as <&> except flatten the result.
|
2008-03-31 20:18:05 -04:00
|
|
|
<&> [ first2 suffix ] <@ ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: <&:> ( parser1 parser2 -- result )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Same as <&> except flatten the result.
|
2008-03-31 20:18:05 -04:00
|
|
|
<&> [ first2 swap prefix ] <@ ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-25 04:55:15 -05:00
|
|
|
: <:&:> ( parser1 parser2 -- result )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Same as <&> except flatten the result.
|
|
|
|
<&> [ first2 append ] <@ ;
|
2007-11-25 04:55:15 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
LAZY: <*> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
dup <*> <&:> { } succeed <|> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: <+> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Return a parser that accepts one or more occurences of the original
|
|
|
|
#! parser.
|
|
|
|
dup <*> <&:> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
LAZY: <?> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Return a parser that optionally uses the parser
|
2007-12-08 03:21:50 -05:00
|
|
|
#! if that parser would be successful.
|
2007-11-25 17:07:32 -05:00
|
|
|
[ 1array ] <@ f succeed <|> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
TUPLE: only-first-parser p1 ;
|
2007-11-25 17:07:32 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
LAZY: only-first ( parser -- parser )
|
2008-04-13 16:06:27 -04:00
|
|
|
only-first-parser boa ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-25 17:07:32 -05:00
|
|
|
M: only-first-parser parse ( input parser -- list )
|
|
|
|
#! Transform a parser into a parser that only yields
|
|
|
|
#! the first possibility.
|
|
|
|
only-first-parser-p1 parse 1 swap ltake ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
LAZY: <!*> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Like <*> but only return one possible result
|
|
|
|
#! containing all matching parses. Does not return
|
|
|
|
#! partial matches. Useful for efficiency since that's
|
|
|
|
#! usually the effect you want and cuts down on backtracking
|
|
|
|
#! required.
|
|
|
|
<*> only-first ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
LAZY: <!+> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Like <+> but only return one possible result
|
|
|
|
#! containing all matching parses. Does not return
|
|
|
|
#! partial matches. Useful for efficiency since that's
|
|
|
|
#! usually the effect you want and cuts down on backtracking
|
|
|
|
#! required.
|
|
|
|
<+> only-first ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
LAZY: <!?> ( parser -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Like <?> but only return one possible result
|
|
|
|
#! containing all matching parses. Does not return
|
|
|
|
#! partial matches. Useful for efficiency since that's
|
|
|
|
#! usually the effect you want and cuts down on backtracking
|
|
|
|
#! required.
|
|
|
|
<?> only-first ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-12-08 03:21:50 -05:00
|
|
|
LAZY: <(?)> ( parser -- parser )
|
|
|
|
#! Like <?> but take shortest match first.
|
|
|
|
f succeed swap [ 1array ] <@ <|> ;
|
|
|
|
|
2007-11-24 18:07:01 -05:00
|
|
|
LAZY: <(*)> ( parser -- parser )
|
|
|
|
#! Like <*> but take shortest match first.
|
2007-09-20 18:09:08 -04:00
|
|
|
#! Implementation by Matthew Willis.
|
|
|
|
{ } succeed swap dup <(*)> <&:> <|> ;
|
|
|
|
|
|
|
|
LAZY: <(+)> ( parser -- parser )
|
2007-11-24 18:07:01 -05:00
|
|
|
#! Like <+> but take shortest match first.
|
2007-09-20 18:09:08 -04:00
|
|
|
#! Implementation by Matthew Willis.
|
|
|
|
dup <(*)> <&:> ;
|
|
|
|
|
2007-11-24 20:19:39 -05:00
|
|
|
: pack ( close body open -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Parse a construct enclosed by two symbols,
|
|
|
|
#! given a parser for the opening symbol, the
|
|
|
|
#! closing symbol, and the body.
|
|
|
|
<& &> ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2007-11-24 20:19:39 -05:00
|
|
|
: nonempty-list-of ( items separator -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
[ over &> <*> <&:> ] keep <?> tuck pack ;
|
2007-11-24 20:19:39 -05:00
|
|
|
|
|
|
|
: list-of ( items separator -- parser )
|
2007-11-25 17:07:32 -05:00
|
|
|
#! Given a parser for the separator and for the
|
|
|
|
#! items themselves, return a parser that parses
|
|
|
|
#! lists of those items. The parse tree is an
|
|
|
|
#! array of the parsed items.
|
|
|
|
nonempty-list-of { } succeed <|> ;
|
2007-11-24 18:07:01 -05:00
|
|
|
|
|
|
|
LAZY: surrounded-by ( parser start end -- parser' )
|
2008-03-29 21:36:58 -04:00
|
|
|
[ token ] bi@ swapd pack ;
|
2007-11-30 20:20:38 -05:00
|
|
|
|
2007-12-02 15:55:44 -05:00
|
|
|
: exactly-n ( parser n -- parser' )
|
2007-12-08 03:21:50 -05:00
|
|
|
swap <repetition> <and-parser> [ flatten ] <@ ;
|
2007-11-30 20:20:38 -05:00
|
|
|
|
2007-12-02 15:55:44 -05:00
|
|
|
: at-most-n ( parser n -- parser' )
|
|
|
|
dup zero? [
|
|
|
|
2drop epsilon
|
|
|
|
] [
|
|
|
|
2dup exactly-n
|
|
|
|
-rot 1- at-most-n <|>
|
|
|
|
] if ;
|
2007-11-30 20:20:38 -05:00
|
|
|
|
2007-12-02 15:55:44 -05:00
|
|
|
: at-least-n ( parser n -- parser' )
|
|
|
|
dupd exactly-n swap <*> <&> ;
|
2007-11-30 20:20:38 -05:00
|
|
|
|
2007-12-02 15:55:44 -05:00
|
|
|
: from-m-to-n ( parser m n -- parser' )
|
2007-12-08 03:21:50 -05:00
|
|
|
>r [ exactly-n ] 2keep r> swap - at-most-n <:&:> ;
|