fix parser combinator bit rot

2006-08-02 05:03:47 +00:00 · 2006-08-02 05:03:47 +00:00 · e4ca1e2bd3
parent 5bb0a8bee3
commit e4ca1e2bd3
3 changed files with 118 additions and 214 deletions
--- a/contrib/parser-combinators/load.factor
+++ b/contrib/parser-combinators/load.factor
@ -2,4 +2,5 @@ REQUIRES: lazy-lists ;
 PROVIDE: parser-combinators { 
    "parser-combinators.factor"
 } {
+    "tests.factor"
 } ;
--- a/contrib/parser-combinators/parser-combinators.factor
+++ b/contrib/parser-combinators/parser-combinators.factor
@ -20,9 +20,11 @@
 ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-USING: lazy-lists kernel sequences strings math io ;
+USING: lazy-lists kernel sequences strings math io arrays ;
 IN: parser-combinators

+TUPLE: parse-result parsed unparsed ;
+
 : h:t ( object -- head tail )
  #! Return the head and tail of the object.
  dup first swap 1 tail ;
@ -31,26 +33,26 @@ IN: parser-combinators
  #! A parser that parses a specific sequence of
  #! characters.
  2dup length head over = [
-    swap over length tail swons unit delay lunit
+    swap over length tail <parse-result> lunit
  ] [
-    2drop lnil
+    2drop nil
  ] if ;

 : token ( string -- parser )
  #! Return a token parser that parses the given string.
-  [ token-parser ] cons ;
+  [ token-parser ] curry ;

 : satisfy-parser ( inp pred -- llist )
  #! A parser that succeeds if the predicate,
  #! when passed the first character in the input, returns
  #! true.
  over empty? [
-    2drop lnil
+    2drop nil
  ] [        
    over first swap call [
-      h:t swons unit delay lunit
+      h:t <parse-result> lunit
    ] [
-      drop lnil
+      drop nil
    ] if
  ] if ;
  
@ -58,7 +60,7 @@ IN: parser-combinators
  #! Return a parser that succeeds if the predicate 'p',
  #! when passed the first character in the input, returns
  #! true.
-  [ satisfy-parser ] cons ;
+  [ satisfy-parser ] curry ;

 : satisfy2-parser ( inp pred quot -- llist )
  #! A parser that succeeds if the predicate,
@ -68,21 +70,21 @@ IN: parser-combinators
  #! of that call is returned as the result portion of the
  #! successfull parse lazy list.
  -rot over first swap call [ ( quot inp -- )
-    h:t >r swap call r> swons unit delay lunit
+    h:t >r swap call r> <parse-result> lunit
  ] [
-    2drop lnil
+    2drop nil
  ] if ;

  : satisfy2 ( pred quot -- parser )
  #! Return a satisfy2-parser.
-  [ satisfy2-parser ] cons cons ;
+  [ satisfy2-parser ] curry curry ;

 : epsilon-parser ( input -- llist )
  #! A parser that parses the empty string. It
  #! does not consume any input and always returns
  #! an empty list as the parse tree with the
  #! unmodified input.
-  "" cons unit delay lunit ;
+  "" swap <parse-result> lunit ;

 : epsilon ( -- parser )
  #! Return an epsilon parser
@ -91,53 +93,51 @@ IN: parser-combinators
 : succeed-parser ( input result -- llist )
  #! A parser that always returns 'result' as a
  #! successful parse with no input consumed.
-  cons unit delay lunit ;
+  swap <parse-result> lunit ;

 : succeed ( result -- parser )
  #! Return a succeed parser.
-  [ succeed-parser ] cons ;
+  [ succeed-parser ] curry ;

 : fail-parser ( input -- llist )
  #! A parser that always fails and returns
  #! an empty list of successes.
-  drop lnil ;
+  drop nil ;

 : fail ( -- parser )
  #! Return a fail-parser.
  [ fail-parser ] ;

-: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
+: <&>-do-parser3 ( <parse-result> x -- result )
  #! Called by <&>-do-parser2 on each result of the
  #! parse from parser2. 
-  >r uncons r> ( x1 xs2 x )
-  swap cons cons ;
+  >r dup parse-result-unparsed swap parse-result-parsed r> ( x1 xs2 x )
+  swap 2array  swap <parse-result> ;

-: unswons uncons swap ;
-
-: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
+: <&>-do-parser2 ( <parse-result> parser2 -- result )
  #! Called by the <&>-parser on each result of the
  #! successfull parse of parser1. It's input is the
  #! cons containing the data parsed and the remaining
  #! input. This word will parser2 on the remaining input
  #! returning a new cons cell containing the combined
  #! parse result.
-  >r unswons r> ( x xs parser2 )
+  >r dup parse-result-parsed swap parse-result-unparsed r> ( x xs parser2 )
  call swap    ( llist x )
-  [ <&>-do-parser3 ] cons lmap ;
+  [ <&>-do-parser3 ] curry lmap ;

 : <&>-parser ( input parser1 parser2 -- llist )
  #! Parse 'input' by sequentially combining the
  #! two parsers. First parser1 is applied to the
  #! input then parser2 is applied to the rest of
  #! the input strings from the first parser. 
-  >r call r>   ( [[ x xs ]] p2 -- result )
-  [ <&>-do-parser2 ] cons lmap lappend* ;
+  >r call r>   ( <parse-result> p2 -- result )
+  [ <&>-do-parser2 ] curry lmap lappend* ;

 : <&> ( parser1 parser2 -- parser )
  #! Sequentially combine two parsers, returning a parser
  #! that first calls p1, then p2 all remaining results from
  #! p1. 
-  [ <&>-parser ] cons cons ;
+  [ <&>-parser ] curry curry ;

 : <|>-parser ( input parser1 parser2 -- result )
  #! Return the combined list resulting from the parses
@ -148,7 +148,7 @@ IN: parser-combinators
 : <|> ( p1 p2 -- parser )
  #! Choice operator for parsers. Return a parser that does
  #! p1 or p2 depending on which will succeed.
-  [ <|>-parser ] cons cons ;
+  [ <|>-parser ] curry curry ;

 : string-ltrim ( string -- string )
  #! Return a new string without any leading whitespace
@ -163,25 +163,25 @@ IN: parser-combinators
 : sp ( parser -- parser )
  #! Return a parser that first skips all whitespace before
  #! calling the original parser.
-  [ sp-parser ] cons ;
+  [ sp-parser ] curry ;

 : just-parser ( input parser -- result )
  #! Calls the given parser on the input removes
  #! from the results anything where the remaining
  #! input to be parsed is not empty. So ensures a 
  #! fully parsed input string.
-  call [ car empty? ] lsubset ;
+  call [ parse-result-unparsed empty? ] lsubset ;

 : just ( parser -- parser )
  #! Return an instance of the just-parser.
-  [ just-parser ] cons ;
+  [ just-parser ] curry ;

 : (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
  #! Perform the result replacement step of <@-parser. 
  #! Given a successfull parse result, calls the quotation
  #! with the result portion on the stack. The result of
  #! that call is then used as the new result.
-  swap uncons rot call cons ;
+  swap dup parse-result-unparsed swap parse-result-parsed rot call swap <parse-result> ;

 : <@-parser ( input parser quot -- result )
  #! Calls the parser on the input. For each successfull
@ -189,26 +189,26 @@ IN: parser-combinators
  #! The result of that quotation then becomes the new parse result.
  #! This allows modification of parse tree results (like
  #! converting strings to integers, etc).
-  -rot call dup lnil? [ ( quot lnil -- )
+  -rot call dup nil? [ ( quot nil -- )
    nip
  ] [ ( quot result -- )
-    [ (<@-parser-replace) ] rot swons lmap
+    [ (<@-parser-replace) ] rot swap curry lmap
  ] if ;

 : <@ ( parser quot -- parser )
  #! Return an <@-parser.
-  [ <@-parser ] cons cons ;
+  [ <@-parser ] curry curry ;

 : some-parser ( input parser -- result )
  #! Calls the parser on the input, guarantees
  #! the parse is complete (the remaining input is empty),
  #! picks the first solution and only returns the parse
  #! tree since the remaining input is empty.
-  just call lcar cdr ;
+  just call car parse-result-parsed ;

 : some ( parser -- deterministic-parser )
  #! Creates a 'some-parser'.
-  [ some-parser ] cons ;
+  [ some-parser ] curry ;

 : <&-parser ( input parser1 parser2 -- result )
  #! Same as <&> except discard the results of the second parser.
@ -216,31 +216,23 @@ IN: parser-combinators

 : <& ( parser1 parser2 -- parser )
  #! Same as <&> except discard the results of the second parser.
-  [ <&-parser ] cons cons ;
+  [ <&-parser ] curry curry ;

 : &>-parser ( input parser1 parser2 -- result )
  #! Same as <&> except discard the results of the first parser.
-  <&> [ 1 tail ] <@ call ;
+  <&> [ second ] <@ call ;

 : &> ( parser1 parser2 -- parser )
  #! Same as <&> except discard the results of the first parser.
-  [ &>-parser ] cons cons ;
-
-: (a,(b,c))>((a,b,c)) ( list -- list )
-  #! Convert a list where the car is a single value 
-  #! and the cdr is a list to a list containing a flattened
-  #! list.
-  uncons car cons unit ;
+  [ &>-parser ] curry curry ;

 : <:&>-parser ( input parser1 parser2 -- result )
-  #! Same as <&> except postprocess the result with
-  #! (a,(b,c))>((a,b,c)).
-  <&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
+  #! Same as <&> except flatten the result.
+  <&> [ flatten ] <@ call ;

 : <:&> ( parser1 parser2 -- parser )
-  #! Same as <&> except postprocess the result with
-  #! (a,(b,c))>((a,b,c)).
-  [ <:&>-parser ] cons cons ;
+  #! Same as <&> except flatten the result.
+  [ <:&>-parser ] curry curry ;

 DEFER: <*>

@ -251,7 +243,7 @@ DEFER: <*>
 : <*> ( parser -- parser )
  #! Return a parser that accepts zero or more occurences of the original
  #! parser.
-  [  (<*>) call ] cons ;
+  [  (<*>) call ] curry ;

 : (<+>) ( parser -- parser )
  #! Non-delayed implementation of <+>
@ -260,7 +252,7 @@ DEFER: <*>
 : <+> ( parser -- parser )
  #! Return a parser that accepts one or more occurences of the original
  #! parser.
-  [  (<+>) call ] cons ;
+  [  (<+>) call ] curry ;

 : (<?>) ( parser -- parser )
  #! Non-delayed implementation of <?>
@ -269,4 +261,4 @@ DEFER: <*>
 : <?> ( parser -- parser )
  #! Return a parser that optionally uses the parser
  #! if that parser would be successfull.
-  [  (<?>) call ] cons ;
+  [  (<?>) call ] curry ;
--- a/contrib/parser-combinators/tests.factor
+++ b/contrib/parser-combinators/tests.factor
@ -20,77 +20,77 @@
 ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-USING: kernel lazy test errors strings parser lists math sequences parser-combinators ;
+USING: kernel lazy-lists test errors strings parser math sequences parser-combinators arrays ;
 IN: scratchpad

 ! Testing <&>
-[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> call llist>list
+[ { T{ parse-result f { "a" "b" } "cd" } }  ] [
+  "abcd" "a" token "b" token <&> call list>array
 ] unit-test

-[ [ [[ "d" [[ [[ "a" "b" ]] "c" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> "c" token <&> call llist>list
+[ { T{ parse-result f { { "a" "b" } "c" } "d" } } ] [
+  "abcd" "a" token "b" token <&> "c" token <&> call list>array
 ] unit-test

-[ [ [[ "d" [[ "a" [[ "b" "c" ]] ]] ]] ] ] [
-  "abcd" "a" token "b" token "c" token <&> <&> call llist>list
+[ { T{ parse-result f { "a" { "b" "c" } } "d" } } ] [
+  "abcd" "a" token "b" token "c" token <&> <&> call list>array
 ] unit-test

-[ f ] [
-  "decd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "decd" "a" token "b" token <&> call list>array
 ] unit-test

-[ f ] [
-  "dbcd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "dbcd" "a" token "b" token <&> call list>array
 ] unit-test

-[ f ] [
-  "adcd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "adcd" "a" token "b" token <&> call list>array
 ] unit-test

 ! Testing <|>
-[ [ [[ "bcd" "a" ]] ] ] [
-  "abcd" "a" token "b" token <|> call llist>list
+[ { T{ parse-result f "a" "bcd" } } ] [
+  "abcd" "a" token "b" token <|> call list>array
 ] unit-test

-[ [ [[ "bcd" "b" ]] ] ] [
-  "bbcd" "a" token "b" token <|> call llist>list
+[ { T{ parse-result f "b" "bcd" } }  ] [
+  "bbcd" "a" token "b" token <|> call list>array
 ] unit-test

-[ f ] [
-  "cbcd" "a" token "b" token <|> call llist>list
+[ { } ] [
+  "cbcd" "a" token "b" token <|> call list>array
 ] unit-test

 ! Testing sp
-[ f ] [
-  "  abcd" "a" token call llist>list 
+[ { } ] [
+  "  abcd" "a" token call list>array 
 ] unit-test

-[ [ [[ "bcd" "a" ]] ] ] [
-  "  abcd" "a" token sp call llist>list 
+[ { T{ parse-result f "a" "bcd" } }  ] [
+  "  abcd" "a" token sp call list>array 
 ] unit-test

 ! Testing just
-[ [ [[ "" "abcd" ]] [[ "d" "abc" ]] ] ] [
-  "abcd" "abcd" token "abc" token <|> call llist>list
+[ { T{ parse-result f "abcd" "" } T{ parse-result f "abc" "d" } } ] [
+  "abcd" "abcd" token "abc" token <|> call list>array
 ] unit-test

-[ [ [[ "" "abcd" ]] ] ] [
-  "abcd" "abcd" token "abc" token <|> just call llist>list
+[ { T{ parse-result f "abcd" "" } } ] [
+  "abcd" "abcd" token "abc" token <|> just call list>array
 ] unit-test 

 ! Testing <@
-[ [ [[ "1234" 48 ]] ] ] [
-  "01234" [ digit? ] satisfy call llist>list 
+[ { T{ parse-result f 48 "1234" } } ] [
+  "01234" [ digit? ] satisfy call list>array 
 ] unit-test

-[ [ [[ "1234" 0 ]] ] ] [
-  "01234" [ digit? ] satisfy [ digit> ] <@ call llist>list 
+[ { T{ parse-result f 0 "1234" } } ] [
+  "01234" [ digit? ] satisfy [ digit> ] <@ call list>array 
 ] unit-test

 ! Testing some
-[ [ [[ "1" "begin" ]] ] ] [
-  "begin1" "begin" token call llist>list
+[ { T{ parse-result f "begin" "1" } } ] [
+  "begin1" "begin" token call list>array
 ] unit-test

 [
@ -101,160 +101,71 @@ IN: scratchpad
  "begin" "begin" token some call 
 ] unit-test

-! parens test function
-: parens ( -- parser )
-  #! Return a parser that parses nested parentheses.
-  [ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ]  ;
-
-[ [ [[ "" "" ]] ] ] [
-  "" parens call llist>list
-] unit-test
-
-[  
-  [[ "" [[ [[ [[ "(" "" ]] ")" ]] "" ]] ]]
-  [[ "()" "" ]]
-] [
-  "()" parens call [ ] leach
-] unit-test
-
-[ [[ "((()))" "" ]] ] [
-  "((()))" parens call lcdr lcar 
-] unit-test
-
 ! <& parser and &> parser
-[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> call llist>list
+[ { T{ parse-result f { "a" "b" } "cd" } } ] [
+  "abcd" "a" token "b" token <&> call list>array
 ] unit-test

-[ [ [[ "cd" "a" ]] ] ] [
-  "abcd" "a" token "b" token <& call llist>list
+[ { T{ parse-result f "a" "cd" } } ] [
+  "abcd" "a" token "b" token <& call list>array
 ] unit-test

-[ [ [[ "cd" "b" ]] ] ] [
-  "abcd" "a" token "b" token &> call llist>list
-] unit-test
-
-! nesting example
-: parens-open "(" token ;
-: parens-close ")" token ;
-: nesting
-  [ parens-open 
-    nesting &> 
-    parens-close <& 
-    nesting <&> 
-    [ unswons 1 + max ] <@
-    0 succeed <|> 
-    call ] ;
-
-[ [ [[ "" 0 ]] ] ] [
-  "" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 1 ]] ] ] [
-  "()" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 2 ]] ] ] [
-  "(())" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 3 ]] ] ] [
-  "()(()(()()))()" nesting just call llist>list
+[ { T{ parse-result f "b" "cd" } } ] [
+  "abcd" "a" token "b" token &> call list>array
 ] unit-test

 ! Testing <*> and <:&>
-[ [ [ "234" [ "1" ] ] [ "1234" ] ] ] [
-  "1234" "1" token <*> call llist>list
+[ { T{ parse-result f { "1" } "234" } T{ parse-result f [ ] "1234" } } ] [
+  "1234" "1" token <*> call list>array
 ] unit-test

 [ 
-  [ "234" [ "1" "1" "1" "1" ] ]
-  [ "1234" [ "1" "1" "1" ] ]
-  [ "11234" [ "1" "1" ] ]
-  [ "111234" [ "1" ] ]
-  [ "1111234" ]
+  {
+    T{ parse-result f { "1" "1" "1" "1" } "234" }
+    T{ parse-result f { "1" "1" "1" } "1234" }
+    T{ parse-result f { "1" "1" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+    T{ parse-result f [ ] "1111234" }
+  }
+
 ] [
-  "1111234" "1" token <*> call [ ] leach
+  "1111234" "1" token <*> call list>array
 ] unit-test

 [ 
-  [ "234" "1111" ]
-  [ "1234" "111" ]
-  [ "11234" "11" ]
-  [ "111234" "1" ]
-  [ "1111234" f ]
+  {
+    T{ parse-result f { "1111" } "234" }
+    T{ parse-result f { "111" } "1234" }
+    T{ parse-result f { "11" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+    T{ parse-result f { [ ] } "1111234" }
+  }
 ] [
-  "1111234" "1" token <*> [ car concat unit ] <@ call [ ] leach
+  "1111234" "1" token <*> [ concat 1array ] <@ call list>array
 ] unit-test

-[ [ "234" ] ] [
-  "234" "1" token <*> call [ ] leach
-] unit-test
-
-: pdigit [ digit? ] satisfy [ digit> ] <@ ;
-: pnatural pdigit <*> ;
-: pnatural2 pnatural [ car [ >digit ] map >string dup pempty? [ drop 0 ] [ string>number ] if unit ] <@ ;
-
-[ 
-  [ "" 12345 ]
-  [ "5" 1234 ]
-  [ "45" 123 ]
-  [ "345" 12 ]
-  [ "2345" 1 ]
-  [ "12345" 0 ]
-] [
-  "12345" pnatural2 call [ ] leach
+[ { T{ parse-result f [ ] "234" } } ] [
+  "234" "1" token <*> call list>array
 ] unit-test

 ! Testing <+>
-[ [ "234" [ "1" ] ] ] [
-  "1234" "1" token <+> call [ ] leach
+[ { T{ parse-result f { "1" } "234" } } ] [
+  "1234" "1" token <+> call list>array
 ] unit-test

 [ 
-  [ "234" [ "1" "1" "1" "1" ] ]
-  [ "1234" [ "1" "1" "1" ] ]
-  [ "11234" [ "1" "1" ] ]
-  [ "111234" [ "1" ] ]
+  {
+    T{ parse-result f { "1" "1" "1" "1" } "234" }
+    T{ parse-result f { "1" "1" "1" } "1234" }
+    T{ parse-result f { "1" "1" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+  }
 ] [
-  "1111234" "1" token <+> call [ ] leach
+  "1111234" "1" token <+> call list>array
 ] unit-test

-[ ] [
-  "234" "1" token <+> call [ ] leach
+[ { } ] [
+  "234" "1" token <+> call list>array
 ] unit-test

-! Testing <?>
-[ [[ "" [[ [ "a" ] "b" ]] ]] ] [
-  "ab" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-[ ] [
-  "ac" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-[ [[ "" [[ [ "a" 5 ] "b" ]] ]] ] [
-  "a5b" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
-
-[ 
-  [ "" 123 ]
-  [ "3" 12 ]
-  [ "23" 1 ]
-  [ "123" 0 ]
-] [
-  "123" pinteger call [ ] leach
-] unit-test
-
-[ 
-  [[ "" -123 ]]
-  [[ "3" -12 ]]
-  [[ "23" -1 ]]
-  [[ "123" 0 ]]
-  [ "-123" 0 ] 
-] [
-  "-123" pinteger call [ ] leach
-] unit-test