fix parser combinator bit rot

chris.double 2006-08-02 05:03:47 +00:00
parent 5bb0a8bee3
commit e4ca1e2bd3
3 changed files with 118 additions and 214 deletions

View File

@ -2,4 +2,5 @@ REQUIRES: lazy-lists ;
PROVIDE: parser-combinators {
"parser-combinators.factor"
} {
"tests.factor"
} ;

View File

@ -20,9 +20,11 @@
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
USING: lazy-lists kernel sequences strings math io ;
USING: lazy-lists kernel sequences strings math io arrays ;
IN: parser-combinators
TUPLE: parse-result parsed unparsed ;
: h:t ( object -- head tail )
#! Return the head and tail of the object.
dup first swap 1 tail ;
@ -31,26 +33,26 @@ IN: parser-combinators
#! A parser that parses a specific sequence of
#! characters.
2dup length head over = [
swap over length tail swons unit delay lunit
swap over length tail <parse-result> lunit
] [
2drop lnil
2drop nil
] if ;
: token ( string -- parser )
#! Return a token parser that parses the given string.
[ token-parser ] cons ;
[ token-parser ] curry ;
: satisfy-parser ( inp pred -- llist )
#! A parser that succeeds if the predicate,
#! when passed the first character in the input, returns
#! true.
over empty? [
2drop lnil
2drop nil
] [
over first swap call [
h:t swons unit delay lunit
h:t <parse-result> lunit
] [
drop lnil
drop nil
] if
] if ;
@ -58,7 +60,7 @@ IN: parser-combinators
#! Return a parser that succeeds if the predicate 'p',
#! when passed the first character in the input, returns
#! true.
[ satisfy-parser ] cons ;
[ satisfy-parser ] curry ;
: satisfy2-parser ( inp pred quot -- llist )
#! A parser that succeeds if the predicate,
@ -68,21 +70,21 @@ IN: parser-combinators
#! of that call is returned as the result portion of the
#! successfull parse lazy list.
-rot over first swap call [ ( quot inp -- )
h:t >r swap call r> swons unit delay lunit
h:t >r swap call r> <parse-result> lunit
] [
2drop lnil
2drop nil
] if ;
: satisfy2 ( pred quot -- parser )
#! Return a satisfy2-parser.
[ satisfy2-parser ] cons cons ;
[ satisfy2-parser ] curry curry ;
: epsilon-parser ( input -- llist )
#! A parser that parses the empty string. It
#! does not consume any input and always returns
#! an empty list as the parse tree with the
#! unmodified input.
"" cons unit delay lunit ;
"" swap <parse-result> lunit ;
: epsilon ( -- parser )
#! Return an epsilon parser
@ -91,53 +93,51 @@ IN: parser-combinators
: succeed-parser ( input result -- llist )
#! A parser that always returns 'result' as a
#! successful parse with no input consumed.
cons unit delay lunit ;
swap <parse-result> lunit ;
: succeed ( result -- parser )
#! Return a succeed parser.
[ succeed-parser ] cons ;
[ succeed-parser ] curry ;
: fail-parser ( input -- llist )
#! A parser that always fails and returns
#! an empty list of successes.
drop lnil ;
drop nil ;
: fail ( -- parser )
#! Return a fail-parser.
[ fail-parser ] ;
: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
: <&>-do-parser3 ( <parse-result> x -- result )
#! Called by <&>-do-parser2 on each result of the
#! parse from parser2.
>r uncons r> ( x1 xs2 x )
swap cons cons ;
>r dup parse-result-unparsed swap parse-result-parsed r> ( x1 xs2 x )
swap 2array swap <parse-result> ;
: unswons uncons swap ;
: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
: <&>-do-parser2 ( <parse-result> parser2 -- result )
#! Called by the <&>-parser on each result of the
#! successfull parse of parser1. It's input is the
#! cons containing the data parsed and the remaining
#! input. This word will parser2 on the remaining input
#! returning a new cons cell containing the combined
#! parse result.
>r unswons r> ( x xs parser2 )
>r dup parse-result-parsed swap parse-result-unparsed r> ( x xs parser2 )
call swap ( llist x )
[ <&>-do-parser3 ] cons lmap ;
[ <&>-do-parser3 ] curry lmap ;
: <&>-parser ( input parser1 parser2 -- llist )
#! Parse 'input' by sequentially combining the
#! two parsers. First parser1 is applied to the
#! input then parser2 is applied to the rest of
#! the input strings from the first parser.
>r call r> ( [[ x xs ]] p2 -- result )
[ <&>-do-parser2 ] cons lmap lappend* ;
>r call r> ( <parse-result> p2 -- result )
[ <&>-do-parser2 ] curry lmap lappend* ;
: <&> ( parser1 parser2 -- parser )
#! Sequentially combine two parsers, returning a parser
#! that first calls p1, then p2 all remaining results from
#! p1.
[ <&>-parser ] cons cons ;
[ <&>-parser ] curry curry ;
: <|>-parser ( input parser1 parser2 -- result )
#! Return the combined list resulting from the parses
@ -148,7 +148,7 @@ IN: parser-combinators
: <|> ( p1 p2 -- parser )
#! Choice operator for parsers. Return a parser that does
#! p1 or p2 depending on which will succeed.
[ <|>-parser ] cons cons ;
[ <|>-parser ] curry curry ;
: string-ltrim ( string -- string )
#! Return a new string without any leading whitespace
@ -163,25 +163,25 @@ IN: parser-combinators
: sp ( parser -- parser )
#! Return a parser that first skips all whitespace before
#! calling the original parser.
[ sp-parser ] cons ;
[ sp-parser ] curry ;
: just-parser ( input parser -- result )
#! Calls the given parser on the input removes
#! from the results anything where the remaining
#! input to be parsed is not empty. So ensures a
#! fully parsed input string.
call [ car empty? ] lsubset ;
call [ parse-result-unparsed empty? ] lsubset ;
: just ( parser -- parser )
#! Return an instance of the just-parser.
[ just-parser ] cons ;
[ just-parser ] curry ;
: (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
#! Perform the result replacement step of <@-parser.
#! Given a successfull parse result, calls the quotation
#! with the result portion on the stack. The result of
#! that call is then used as the new result.
swap uncons rot call cons ;
swap dup parse-result-unparsed swap parse-result-parsed rot call swap <parse-result> ;
: <@-parser ( input parser quot -- result )
#! Calls the parser on the input. For each successfull
@ -189,26 +189,26 @@ IN: parser-combinators
#! The result of that quotation then becomes the new parse result.
#! This allows modification of parse tree results (like
#! converting strings to integers, etc).
-rot call dup lnil? [ ( quot lnil -- )
-rot call dup nil? [ ( quot nil -- )
nip
] [ ( quot result -- )
[ (<@-parser-replace) ] rot swons lmap
[ (<@-parser-replace) ] rot swap curry lmap
] if ;
: <@ ( parser quot -- parser )
#! Return an <@-parser.
[ <@-parser ] cons cons ;
[ <@-parser ] curry curry ;
: some-parser ( input parser -- result )
#! Calls the parser on the input, guarantees
#! the parse is complete (the remaining input is empty),
#! picks the first solution and only returns the parse
#! tree since the remaining input is empty.
just call lcar cdr ;
just call car parse-result-parsed ;
: some ( parser -- deterministic-parser )
#! Creates a 'some-parser'.
[ some-parser ] cons ;
[ some-parser ] curry ;
: <&-parser ( input parser1 parser2 -- result )
#! Same as <&> except discard the results of the second parser.
@ -216,31 +216,23 @@ IN: parser-combinators
: <& ( parser1 parser2 -- parser )
#! Same as <&> except discard the results of the second parser.
[ <&-parser ] cons cons ;
[ <&-parser ] curry curry ;
: &>-parser ( input parser1 parser2 -- result )
#! Same as <&> except discard the results of the first parser.
<&> [ 1 tail ] <@ call ;
<&> [ second ] <@ call ;
: &> ( parser1 parser2 -- parser )
#! Same as <&> except discard the results of the first parser.
[ &>-parser ] cons cons ;
: (a,(b,c))>((a,b,c)) ( list -- list )
#! Convert a list where the car is a single value
#! and the cdr is a list to a list containing a flattened
#! list.
uncons car cons unit ;
[ &>-parser ] curry curry ;
: <:&>-parser ( input parser1 parser2 -- result )
#! Same as <&> except postprocess the result with
#! (a,(b,c))>((a,b,c)).
<&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
#! Same as <&> except flatten the result.
<&> [ flatten ] <@ call ;
: <:&> ( parser1 parser2 -- parser )
#! Same as <&> except postprocess the result with
#! (a,(b,c))>((a,b,c)).
[ <:&>-parser ] cons cons ;
#! Same as <&> except flatten the result.
[ <:&>-parser ] curry curry ;
DEFER: <*>
@ -251,7 +243,7 @@ DEFER: <*>
: <*> ( parser -- parser )
#! Return a parser that accepts zero or more occurences of the original
#! parser.
[ (<*>) call ] cons ;
[ (<*>) call ] curry ;
: (<+>) ( parser -- parser )
#! Non-delayed implementation of <+>
@ -260,7 +252,7 @@ DEFER: <*>
: <+> ( parser -- parser )
#! Return a parser that accepts one or more occurences of the original
#! parser.
[ (<+>) call ] cons ;
[ (<+>) call ] curry ;
: (<?>) ( parser -- parser )
#! Non-delayed implementation of <?>
@ -269,4 +261,4 @@ DEFER: <*>
: <?> ( parser -- parser )
#! Return a parser that optionally uses the parser
#! if that parser would be successfull.
[ (<?>) call ] cons ;
[ (<?>) call ] curry ;

View File

@ -20,77 +20,77 @@
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
USING: kernel lazy test errors strings parser lists math sequences parser-combinators ;
USING: kernel lazy-lists test errors strings parser math sequences parser-combinators arrays ;
IN: scratchpad
! Testing <&>
[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
"abcd" "a" token "b" token <&> call llist>list
[ { T{ parse-result f { "a" "b" } "cd" } } ] [
"abcd" "a" token "b" token <&> call list>array
] unit-test
[ [ [[ "d" [[ [[ "a" "b" ]] "c" ]] ]] ] ] [
"abcd" "a" token "b" token <&> "c" token <&> call llist>list
[ { T{ parse-result f { { "a" "b" } "c" } "d" } } ] [
"abcd" "a" token "b" token <&> "c" token <&> call list>array
] unit-test
[ [ [[ "d" [[ "a" [[ "b" "c" ]] ]] ]] ] ] [
"abcd" "a" token "b" token "c" token <&> <&> call llist>list
[ { T{ parse-result f { "a" { "b" "c" } } "d" } } ] [
"abcd" "a" token "b" token "c" token <&> <&> call list>array
] unit-test
[ f ] [
"decd" "a" token "b" token <&> call llist>list
[ { } ] [
"decd" "a" token "b" token <&> call list>array
] unit-test
[ f ] [
"dbcd" "a" token "b" token <&> call llist>list
[ { } ] [
"dbcd" "a" token "b" token <&> call list>array
] unit-test
[ f ] [
"adcd" "a" token "b" token <&> call llist>list
[ { } ] [
"adcd" "a" token "b" token <&> call list>array
] unit-test
! Testing <|>
[ [ [[ "bcd" "a" ]] ] ] [
"abcd" "a" token "b" token <|> call llist>list
[ { T{ parse-result f "a" "bcd" } } ] [
"abcd" "a" token "b" token <|> call list>array
] unit-test
[ [ [[ "bcd" "b" ]] ] ] [
"bbcd" "a" token "b" token <|> call llist>list
[ { T{ parse-result f "b" "bcd" } } ] [
"bbcd" "a" token "b" token <|> call list>array
] unit-test
[ f ] [
"cbcd" "a" token "b" token <|> call llist>list
[ { } ] [
"cbcd" "a" token "b" token <|> call list>array
] unit-test
! Testing sp
[ f ] [
" abcd" "a" token call llist>list
[ { } ] [
" abcd" "a" token call list>array
] unit-test
[ [ [[ "bcd" "a" ]] ] ] [
" abcd" "a" token sp call llist>list
[ { T{ parse-result f "a" "bcd" } } ] [
" abcd" "a" token sp call list>array
] unit-test
! Testing just
[ [ [[ "" "abcd" ]] [[ "d" "abc" ]] ] ] [
"abcd" "abcd" token "abc" token <|> call llist>list
[ { T{ parse-result f "abcd" "" } T{ parse-result f "abc" "d" } } ] [
"abcd" "abcd" token "abc" token <|> call list>array
] unit-test
[ [ [[ "" "abcd" ]] ] ] [
"abcd" "abcd" token "abc" token <|> just call llist>list
[ { T{ parse-result f "abcd" "" } } ] [
"abcd" "abcd" token "abc" token <|> just call list>array
] unit-test
! Testing <@
[ [ [[ "1234" 48 ]] ] ] [
"01234" [ digit? ] satisfy call llist>list
[ { T{ parse-result f 48 "1234" } } ] [
"01234" [ digit? ] satisfy call list>array
] unit-test
[ [ [[ "1234" 0 ]] ] ] [
"01234" [ digit? ] satisfy [ digit> ] <@ call llist>list
[ { T{ parse-result f 0 "1234" } } ] [
"01234" [ digit? ] satisfy [ digit> ] <@ call list>array
] unit-test
! Testing some
[ [ [[ "1" "begin" ]] ] ] [
"begin1" "begin" token call llist>list
[ { T{ parse-result f "begin" "1" } } ] [
"begin1" "begin" token call list>array
] unit-test
[
@ -101,160 +101,71 @@ IN: scratchpad
"begin" "begin" token some call
] unit-test
! parens test function
: parens ( -- parser )
#! Return a parser that parses nested parentheses.
[ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ] ;
[ [ [[ "" "" ]] ] ] [
"" parens call llist>list
] unit-test
[
[[ "" [[ [[ [[ "(" "" ]] ")" ]] "" ]] ]]
[[ "()" "" ]]
] [
"()" parens call [ ] leach
] unit-test
[ [[ "((()))" "" ]] ] [
"((()))" parens call lcdr lcar
] unit-test
! <& parser and &> parser
[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
"abcd" "a" token "b" token <&> call llist>list
[ { T{ parse-result f { "a" "b" } "cd" } } ] [
"abcd" "a" token "b" token <&> call list>array
] unit-test
[ [ [[ "cd" "a" ]] ] ] [
"abcd" "a" token "b" token <& call llist>list
[ { T{ parse-result f "a" "cd" } } ] [
"abcd" "a" token "b" token <& call list>array
] unit-test
[ [ [[ "cd" "b" ]] ] ] [
"abcd" "a" token "b" token &> call llist>list
] unit-test
! nesting example
: parens-open "(" token ;
: parens-close ")" token ;
: nesting
[ parens-open
nesting &>
parens-close <&
nesting <&>
[ unswons 1 + max ] <@
0 succeed <|>
call ] ;
[ [ [[ "" 0 ]] ] ] [
"" nesting just call llist>list
] unit-test
[ [ [[ "" 1 ]] ] ] [
"()" nesting just call llist>list
] unit-test
[ [ [[ "" 2 ]] ] ] [
"(())" nesting just call llist>list
] unit-test
[ [ [[ "" 3 ]] ] ] [
"()(()(()()))()" nesting just call llist>list
[ { T{ parse-result f "b" "cd" } } ] [
"abcd" "a" token "b" token &> call list>array
] unit-test
! Testing <*> and <:&>
[ [ [ "234" [ "1" ] ] [ "1234" ] ] ] [
"1234" "1" token <*> call llist>list
[ { T{ parse-result f { "1" } "234" } T{ parse-result f [ ] "1234" } } ] [
"1234" "1" token <*> call list>array
] unit-test
[
[ "234" [ "1" "1" "1" "1" ] ]
[ "1234" [ "1" "1" "1" ] ]
[ "11234" [ "1" "1" ] ]
[ "111234" [ "1" ] ]
[ "1111234" ]
{
T{ parse-result f { "1" "1" "1" "1" } "234" }
T{ parse-result f { "1" "1" "1" } "1234" }
T{ parse-result f { "1" "1" } "11234" }
T{ parse-result f { "1" } "111234" }
T{ parse-result f [ ] "1111234" }
}
] [
"1111234" "1" token <*> call [ ] leach
"1111234" "1" token <*> call list>array
] unit-test
[
[ "234" "1111" ]
[ "1234" "111" ]
[ "11234" "11" ]
[ "111234" "1" ]
[ "1111234" f ]
{
T{ parse-result f { "1111" } "234" }
T{ parse-result f { "111" } "1234" }
T{ parse-result f { "11" } "11234" }
T{ parse-result f { "1" } "111234" }
T{ parse-result f { [ ] } "1111234" }
}
] [
"1111234" "1" token <*> [ car concat unit ] <@ call [ ] leach
"1111234" "1" token <*> [ concat 1array ] <@ call list>array
] unit-test
[ [ "234" ] ] [
"234" "1" token <*> call [ ] leach
] unit-test
: pdigit [ digit? ] satisfy [ digit> ] <@ ;
: pnatural pdigit <*> ;
: pnatural2 pnatural [ car [ >digit ] map >string dup pempty? [ drop 0 ] [ string>number ] if unit ] <@ ;
[
[ "" 12345 ]
[ "5" 1234 ]
[ "45" 123 ]
[ "345" 12 ]
[ "2345" 1 ]
[ "12345" 0 ]
] [
"12345" pnatural2 call [ ] leach
[ { T{ parse-result f [ ] "234" } } ] [
"234" "1" token <*> call list>array
] unit-test
! Testing <+>
[ [ "234" [ "1" ] ] ] [
"1234" "1" token <+> call [ ] leach
[ { T{ parse-result f { "1" } "234" } } ] [
"1234" "1" token <+> call list>array
] unit-test
[
[ "234" [ "1" "1" "1" "1" ] ]
[ "1234" [ "1" "1" "1" ] ]
[ "11234" [ "1" "1" ] ]
[ "111234" [ "1" ] ]
{
T{ parse-result f { "1" "1" "1" "1" } "234" }
T{ parse-result f { "1" "1" "1" } "1234" }
T{ parse-result f { "1" "1" } "11234" }
T{ parse-result f { "1" } "111234" }
}
] [
"1111234" "1" token <+> call [ ] leach
"1111234" "1" token <+> call list>array
] unit-test
[ ] [
"234" "1" token <+> call [ ] leach
[ { } ] [
"234" "1" token <+> call list>array
] unit-test
! Testing <?>
[ [[ "" [[ [ "a" ] "b" ]] ]] ] [
"ab" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
] unit-test
[ ] [
"ac" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
] unit-test
[ [[ "" [[ [ "a" 5 ] "b" ]] ]] ] [
"a5b" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
] unit-test
: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
[
[ "" 123 ]
[ "3" 12 ]
[ "23" 1 ]
[ "123" 0 ]
] [
"123" pinteger call [ ] leach
] unit-test
[
[[ "" -123 ]]
[[ "3" -12 ]]
[[ "23" -1 ]]
[[ "123" 0 ]]
[ "-123" 0 ]
] [
"-123" pinteger call [ ] leach
] unit-test