From e4ca1e2bd30a0b837f3af6dd642dc0430a5f33ae Mon Sep 17 00:00:00 2001 From: "chris.double" Date: Wed, 2 Aug 2006 05:03:47 +0000 Subject: [PATCH] fix parser combinator bit rot --- contrib/parser-combinators/load.factor | 1 + .../parser-combinators.factor | 100 ++++---- contrib/parser-combinators/tests.factor | 231 ++++++------------ 3 files changed, 118 insertions(+), 214 deletions(-) diff --git a/contrib/parser-combinators/load.factor b/contrib/parser-combinators/load.factor index 7997713880..c5f108ccd5 100644 --- a/contrib/parser-combinators/load.factor +++ b/contrib/parser-combinators/load.factor @@ -2,4 +2,5 @@ REQUIRES: lazy-lists ; PROVIDE: parser-combinators { "parser-combinators.factor" } { + "tests.factor" } ; \ No newline at end of file diff --git a/contrib/parser-combinators/parser-combinators.factor b/contrib/parser-combinators/parser-combinators.factor index e55eeebb34..754d2679bd 100644 --- a/contrib/parser-combinators/parser-combinators.factor +++ b/contrib/parser-combinators/parser-combinators.factor @@ -20,9 +20,11 @@ ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -USING: lazy-lists kernel sequences strings math io ; +USING: lazy-lists kernel sequences strings math io arrays ; IN: parser-combinators +TUPLE: parse-result parsed unparsed ; + : h:t ( object -- head tail ) #! Return the head and tail of the object. dup first swap 1 tail ; @@ -31,26 +33,26 @@ IN: parser-combinators #! A parser that parses a specific sequence of #! characters. 2dup length head over = [ - swap over length tail swons unit delay lunit + swap over length tail lunit ] [ - 2drop lnil + 2drop nil ] if ; : token ( string -- parser ) #! Return a token parser that parses the given string. - [ token-parser ] cons ; + [ token-parser ] curry ; : satisfy-parser ( inp pred -- llist ) #! A parser that succeeds if the predicate, #! when passed the first character in the input, returns #! true. over empty? [ - 2drop lnil + 2drop nil ] [ over first swap call [ - h:t swons unit delay lunit + h:t lunit ] [ - drop lnil + drop nil ] if ] if ; @@ -58,7 +60,7 @@ IN: parser-combinators #! Return a parser that succeeds if the predicate 'p', #! when passed the first character in the input, returns #! true. - [ satisfy-parser ] cons ; + [ satisfy-parser ] curry ; : satisfy2-parser ( inp pred quot -- llist ) #! A parser that succeeds if the predicate, @@ -68,21 +70,21 @@ IN: parser-combinators #! of that call is returned as the result portion of the #! successfull parse lazy list. -rot over first swap call [ ( quot inp -- ) - h:t >r swap call r> swons unit delay lunit + h:t >r swap call r> lunit ] [ - 2drop lnil + 2drop nil ] if ; : satisfy2 ( pred quot -- parser ) #! Return a satisfy2-parser. - [ satisfy2-parser ] cons cons ; + [ satisfy2-parser ] curry curry ; : epsilon-parser ( input -- llist ) #! A parser that parses the empty string. It #! does not consume any input and always returns #! an empty list as the parse tree with the #! unmodified input. - "" cons unit delay lunit ; + "" swap lunit ; : epsilon ( -- parser ) #! Return an epsilon parser @@ -91,53 +93,51 @@ IN: parser-combinators : succeed-parser ( input result -- llist ) #! A parser that always returns 'result' as a #! successful parse with no input consumed. - cons unit delay lunit ; + swap lunit ; : succeed ( result -- parser ) #! Return a succeed parser. - [ succeed-parser ] cons ; + [ succeed-parser ] curry ; : fail-parser ( input -- llist ) #! A parser that always fails and returns #! an empty list of successes. - drop lnil ; + drop nil ; : fail ( -- parser ) #! Return a fail-parser. [ fail-parser ] ; -: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result ) +: <&>-do-parser3 ( x -- result ) #! Called by <&>-do-parser2 on each result of the #! parse from parser2. - >r uncons r> ( x1 xs2 x ) - swap cons cons ; + >r dup parse-result-unparsed swap parse-result-parsed r> ( x1 xs2 x ) + swap 2array swap ; -: unswons uncons swap ; - -: <&>-do-parser2 ( [[ x xs ]] parser2 -- result ) +: <&>-do-parser2 ( parser2 -- result ) #! Called by the <&>-parser on each result of the #! successfull parse of parser1. It's input is the #! cons containing the data parsed and the remaining #! input. This word will parser2 on the remaining input #! returning a new cons cell containing the combined #! parse result. - >r unswons r> ( x xs parser2 ) + >r dup parse-result-parsed swap parse-result-unparsed r> ( x xs parser2 ) call swap ( llist x ) - [ <&>-do-parser3 ] cons lmap ; + [ <&>-do-parser3 ] curry lmap ; : <&>-parser ( input parser1 parser2 -- llist ) #! Parse 'input' by sequentially combining the #! two parsers. First parser1 is applied to the #! input then parser2 is applied to the rest of #! the input strings from the first parser. - >r call r> ( [[ x xs ]] p2 -- result ) - [ <&>-do-parser2 ] cons lmap lappend* ; + >r call r> ( p2 -- result ) + [ <&>-do-parser2 ] curry lmap lappend* ; : <&> ( parser1 parser2 -- parser ) #! Sequentially combine two parsers, returning a parser #! that first calls p1, then p2 all remaining results from #! p1. - [ <&>-parser ] cons cons ; + [ <&>-parser ] curry curry ; : <|>-parser ( input parser1 parser2 -- result ) #! Return the combined list resulting from the parses @@ -148,7 +148,7 @@ IN: parser-combinators : <|> ( p1 p2 -- parser ) #! Choice operator for parsers. Return a parser that does #! p1 or p2 depending on which will succeed. - [ <|>-parser ] cons cons ; + [ <|>-parser ] curry curry ; : string-ltrim ( string -- string ) #! Return a new string without any leading whitespace @@ -163,25 +163,25 @@ IN: parser-combinators : sp ( parser -- parser ) #! Return a parser that first skips all whitespace before #! calling the original parser. - [ sp-parser ] cons ; + [ sp-parser ] curry ; : just-parser ( input parser -- result ) #! Calls the given parser on the input removes #! from the results anything where the remaining #! input to be parsed is not empty. So ensures a #! fully parsed input string. - call [ car empty? ] lsubset ; + call [ parse-result-unparsed empty? ] lsubset ; : just ( parser -- parser ) #! Return an instance of the just-parser. - [ just-parser ] cons ; + [ just-parser ] curry ; : (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] ) #! Perform the result replacement step of <@-parser. #! Given a successfull parse result, calls the quotation #! with the result portion on the stack. The result of #! that call is then used as the new result. - swap uncons rot call cons ; + swap dup parse-result-unparsed swap parse-result-parsed rot call swap ; : <@-parser ( input parser quot -- result ) #! Calls the parser on the input. For each successfull @@ -189,26 +189,26 @@ IN: parser-combinators #! The result of that quotation then becomes the new parse result. #! This allows modification of parse tree results (like #! converting strings to integers, etc). - -rot call dup lnil? [ ( quot lnil -- ) + -rot call dup nil? [ ( quot nil -- ) nip ] [ ( quot result -- ) - [ (<@-parser-replace) ] rot swons lmap + [ (<@-parser-replace) ] rot swap curry lmap ] if ; : <@ ( parser quot -- parser ) #! Return an <@-parser. - [ <@-parser ] cons cons ; + [ <@-parser ] curry curry ; : some-parser ( input parser -- result ) #! Calls the parser on the input, guarantees #! the parse is complete (the remaining input is empty), #! picks the first solution and only returns the parse #! tree since the remaining input is empty. - just call lcar cdr ; + just call car parse-result-parsed ; : some ( parser -- deterministic-parser ) #! Creates a 'some-parser'. - [ some-parser ] cons ; + [ some-parser ] curry ; : <&-parser ( input parser1 parser2 -- result ) #! Same as <&> except discard the results of the second parser. @@ -216,31 +216,23 @@ IN: parser-combinators : <& ( parser1 parser2 -- parser ) #! Same as <&> except discard the results of the second parser. - [ <&-parser ] cons cons ; + [ <&-parser ] curry curry ; : &>-parser ( input parser1 parser2 -- result ) #! Same as <&> except discard the results of the first parser. - <&> [ 1 tail ] <@ call ; + <&> [ second ] <@ call ; : &> ( parser1 parser2 -- parser ) #! Same as <&> except discard the results of the first parser. - [ &>-parser ] cons cons ; - -: (a,(b,c))>((a,b,c)) ( list -- list ) - #! Convert a list where the car is a single value - #! and the cdr is a list to a list containing a flattened - #! list. - uncons car cons unit ; + [ &>-parser ] curry curry ; : <:&>-parser ( input parser1 parser2 -- result ) - #! Same as <&> except postprocess the result with - #! (a,(b,c))>((a,b,c)). - <&> [ (a,(b,c))>((a,b,c)) ] <@ call ; + #! Same as <&> except flatten the result. + <&> [ flatten ] <@ call ; : <:&> ( parser1 parser2 -- parser ) - #! Same as <&> except postprocess the result with - #! (a,(b,c))>((a,b,c)). - [ <:&>-parser ] cons cons ; + #! Same as <&> except flatten the result. + [ <:&>-parser ] curry curry ; DEFER: <*> @@ -251,7 +243,7 @@ DEFER: <*> : <*> ( parser -- parser ) #! Return a parser that accepts zero or more occurences of the original #! parser. - [ (<*>) call ] cons ; + [ (<*>) call ] curry ; : (<+>) ( parser -- parser ) #! Non-delayed implementation of <+> @@ -260,7 +252,7 @@ DEFER: <*> : <+> ( parser -- parser ) #! Return a parser that accepts one or more occurences of the original #! parser. - [ (<+>) call ] cons ; + [ (<+>) call ] curry ; : () ( parser -- parser ) #! Non-delayed implementation of @@ -269,4 +261,4 @@ DEFER: <*> : ( parser -- parser ) #! Return a parser that optionally uses the parser #! if that parser would be successfull. - [ () call ] cons ; + [ () call ] curry ; diff --git a/contrib/parser-combinators/tests.factor b/contrib/parser-combinators/tests.factor index f2bf3cf3c9..5863ddcbe4 100644 --- a/contrib/parser-combinators/tests.factor +++ b/contrib/parser-combinators/tests.factor @@ -20,77 +20,77 @@ ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -USING: kernel lazy test errors strings parser lists math sequences parser-combinators ; +USING: kernel lazy-lists test errors strings parser math sequences parser-combinators arrays ; IN: scratchpad ! Testing <&> -[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [ - "abcd" "a" token "b" token <&> call llist>list +[ { T{ parse-result f { "a" "b" } "cd" } } ] [ + "abcd" "a" token "b" token <&> call list>array ] unit-test -[ [ [[ "d" [[ [[ "a" "b" ]] "c" ]] ]] ] ] [ - "abcd" "a" token "b" token <&> "c" token <&> call llist>list +[ { T{ parse-result f { { "a" "b" } "c" } "d" } } ] [ + "abcd" "a" token "b" token <&> "c" token <&> call list>array ] unit-test -[ [ [[ "d" [[ "a" [[ "b" "c" ]] ]] ]] ] ] [ - "abcd" "a" token "b" token "c" token <&> <&> call llist>list +[ { T{ parse-result f { "a" { "b" "c" } } "d" } } ] [ + "abcd" "a" token "b" token "c" token <&> <&> call list>array ] unit-test -[ f ] [ - "decd" "a" token "b" token <&> call llist>list +[ { } ] [ + "decd" "a" token "b" token <&> call list>array ] unit-test -[ f ] [ - "dbcd" "a" token "b" token <&> call llist>list +[ { } ] [ + "dbcd" "a" token "b" token <&> call list>array ] unit-test -[ f ] [ - "adcd" "a" token "b" token <&> call llist>list +[ { } ] [ + "adcd" "a" token "b" token <&> call list>array ] unit-test ! Testing <|> -[ [ [[ "bcd" "a" ]] ] ] [ - "abcd" "a" token "b" token <|> call llist>list +[ { T{ parse-result f "a" "bcd" } } ] [ + "abcd" "a" token "b" token <|> call list>array ] unit-test -[ [ [[ "bcd" "b" ]] ] ] [ - "bbcd" "a" token "b" token <|> call llist>list +[ { T{ parse-result f "b" "bcd" } } ] [ + "bbcd" "a" token "b" token <|> call list>array ] unit-test -[ f ] [ - "cbcd" "a" token "b" token <|> call llist>list +[ { } ] [ + "cbcd" "a" token "b" token <|> call list>array ] unit-test ! Testing sp -[ f ] [ - " abcd" "a" token call llist>list +[ { } ] [ + " abcd" "a" token call list>array ] unit-test -[ [ [[ "bcd" "a" ]] ] ] [ - " abcd" "a" token sp call llist>list +[ { T{ parse-result f "a" "bcd" } } ] [ + " abcd" "a" token sp call list>array ] unit-test ! Testing just -[ [ [[ "" "abcd" ]] [[ "d" "abc" ]] ] ] [ - "abcd" "abcd" token "abc" token <|> call llist>list +[ { T{ parse-result f "abcd" "" } T{ parse-result f "abc" "d" } } ] [ + "abcd" "abcd" token "abc" token <|> call list>array ] unit-test -[ [ [[ "" "abcd" ]] ] ] [ - "abcd" "abcd" token "abc" token <|> just call llist>list +[ { T{ parse-result f "abcd" "" } } ] [ + "abcd" "abcd" token "abc" token <|> just call list>array ] unit-test ! Testing <@ -[ [ [[ "1234" 48 ]] ] ] [ - "01234" [ digit? ] satisfy call llist>list +[ { T{ parse-result f 48 "1234" } } ] [ + "01234" [ digit? ] satisfy call list>array ] unit-test -[ [ [[ "1234" 0 ]] ] ] [ - "01234" [ digit? ] satisfy [ digit> ] <@ call llist>list +[ { T{ parse-result f 0 "1234" } } ] [ + "01234" [ digit? ] satisfy [ digit> ] <@ call list>array ] unit-test ! Testing some -[ [ [[ "1" "begin" ]] ] ] [ - "begin1" "begin" token call llist>list +[ { T{ parse-result f "begin" "1" } } ] [ + "begin1" "begin" token call list>array ] unit-test [ @@ -101,160 +101,71 @@ IN: scratchpad "begin" "begin" token some call ] unit-test -! parens test function -: parens ( -- parser ) - #! Return a parser that parses nested parentheses. - [ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ] ; - -[ [ [[ "" "" ]] ] ] [ - "" parens call llist>list -] unit-test - -[ - [[ "" [[ [[ [[ "(" "" ]] ")" ]] "" ]] ]] - [[ "()" "" ]] -] [ - "()" parens call [ ] leach -] unit-test - -[ [[ "((()))" "" ]] ] [ - "((()))" parens call lcdr lcar -] unit-test - ! <& parser and &> parser -[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [ - "abcd" "a" token "b" token <&> call llist>list +[ { T{ parse-result f { "a" "b" } "cd" } } ] [ + "abcd" "a" token "b" token <&> call list>array ] unit-test -[ [ [[ "cd" "a" ]] ] ] [ - "abcd" "a" token "b" token <& call llist>list +[ { T{ parse-result f "a" "cd" } } ] [ + "abcd" "a" token "b" token <& call list>array ] unit-test -[ [ [[ "cd" "b" ]] ] ] [ - "abcd" "a" token "b" token &> call llist>list -] unit-test - -! nesting example -: parens-open "(" token ; -: parens-close ")" token ; -: nesting - [ parens-open - nesting &> - parens-close <& - nesting <&> - [ unswons 1 + max ] <@ - 0 succeed <|> - call ] ; - -[ [ [[ "" 0 ]] ] ] [ - "" nesting just call llist>list -] unit-test - -[ [ [[ "" 1 ]] ] ] [ - "()" nesting just call llist>list -] unit-test - -[ [ [[ "" 2 ]] ] ] [ - "(())" nesting just call llist>list -] unit-test - -[ [ [[ "" 3 ]] ] ] [ - "()(()(()()))()" nesting just call llist>list +[ { T{ parse-result f "b" "cd" } } ] [ + "abcd" "a" token "b" token &> call list>array ] unit-test ! Testing <*> and <:&> -[ [ [ "234" [ "1" ] ] [ "1234" ] ] ] [ - "1234" "1" token <*> call llist>list +[ { T{ parse-result f { "1" } "234" } T{ parse-result f [ ] "1234" } } ] [ + "1234" "1" token <*> call list>array ] unit-test [ - [ "234" [ "1" "1" "1" "1" ] ] - [ "1234" [ "1" "1" "1" ] ] - [ "11234" [ "1" "1" ] ] - [ "111234" [ "1" ] ] - [ "1111234" ] + { + T{ parse-result f { "1" "1" "1" "1" } "234" } + T{ parse-result f { "1" "1" "1" } "1234" } + T{ parse-result f { "1" "1" } "11234" } + T{ parse-result f { "1" } "111234" } + T{ parse-result f [ ] "1111234" } + } + ] [ - "1111234" "1" token <*> call [ ] leach + "1111234" "1" token <*> call list>array ] unit-test [ - [ "234" "1111" ] - [ "1234" "111" ] - [ "11234" "11" ] - [ "111234" "1" ] - [ "1111234" f ] + { + T{ parse-result f { "1111" } "234" } + T{ parse-result f { "111" } "1234" } + T{ parse-result f { "11" } "11234" } + T{ parse-result f { "1" } "111234" } + T{ parse-result f { [ ] } "1111234" } + } ] [ - "1111234" "1" token <*> [ car concat unit ] <@ call [ ] leach + "1111234" "1" token <*> [ concat 1array ] <@ call list>array ] unit-test -[ [ "234" ] ] [ - "234" "1" token <*> call [ ] leach -] unit-test - -: pdigit [ digit? ] satisfy [ digit> ] <@ ; -: pnatural pdigit <*> ; -: pnatural2 pnatural [ car [ >digit ] map >string dup pempty? [ drop 0 ] [ string>number ] if unit ] <@ ; - -[ - [ "" 12345 ] - [ "5" 1234 ] - [ "45" 123 ] - [ "345" 12 ] - [ "2345" 1 ] - [ "12345" 0 ] -] [ - "12345" pnatural2 call [ ] leach +[ { T{ parse-result f [ ] "234" } } ] [ + "234" "1" token <*> call list>array ] unit-test ! Testing <+> -[ [ "234" [ "1" ] ] ] [ - "1234" "1" token <+> call [ ] leach +[ { T{ parse-result f { "1" } "234" } } ] [ + "1234" "1" token <+> call list>array ] unit-test [ - [ "234" [ "1" "1" "1" "1" ] ] - [ "1234" [ "1" "1" "1" ] ] - [ "11234" [ "1" "1" ] ] - [ "111234" [ "1" ] ] + { + T{ parse-result f { "1" "1" "1" "1" } "234" } + T{ parse-result f { "1" "1" "1" } "1234" } + T{ parse-result f { "1" "1" } "11234" } + T{ parse-result f { "1" } "111234" } + } ] [ - "1111234" "1" token <+> call [ ] leach + "1111234" "1" token <+> call list>array ] unit-test -[ ] [ - "234" "1" token <+> call [ ] leach +[ { } ] [ + "234" "1" token <+> call list>array ] unit-test -! Testing -[ [[ "" [[ [ "a" ] "b" ]] ]] ] [ - "ab" "a" token pdigit <&> "b" token <&> call [ ] leach -] unit-test - -[ ] [ - "ac" "a" token pdigit <&> "b" token <&> call [ ] leach -] unit-test - -[ [[ "" [[ [ "a" 5 ] "b" ]] ]] ] [ - "a5b" "a" token pdigit <&> "b" token <&> call [ ] leach -] unit-test - -: pinteger "-" token pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ; - -[ - [ "" 123 ] - [ "3" 12 ] - [ "23" 1 ] - [ "123" 0 ] -] [ - "123" pinteger call [ ] leach -] unit-test - -[ - [[ "" -123 ]] - [[ "3" -12 ]] - [[ "23" -1 ]] - [[ "123" 0 ]] - [ "-123" 0 ] -] [ - "-123" pinteger call [ ] leach -] unit-test