From e4ca1e2bd30a0b837f3af6dd642dc0430a5f33ae Mon Sep 17 00:00:00 2001
From: "chris.double" <chris.double@double.co.nz>
Date: Wed, 2 Aug 2006 05:03:47 +0000
Subject: [PATCH] fix parser combinator bit rot

---
 contrib/parser-combinators/load.factor        |   1 +
 .../parser-combinators.factor                 | 100 ++++----
 contrib/parser-combinators/tests.factor       | 231 ++++++------------
 3 files changed, 118 insertions(+), 214 deletions(-)
diff --git a/contrib/parser-combinators/load.factor b/contrib/parser-combinators/load.factor
index 7997713880..c5f108ccd5 100644
--- a/contrib/parser-combinators/load.factor
+++ b/contrib/parser-combinators/load.factor
@@ -2,4 +2,5 @@ REQUIRES: lazy-lists ;
 PROVIDE: parser-combinators { 
     "parser-combinators.factor"
 } {
+    "tests.factor"
 } ;
\ No newline at end of file
diff --git a/contrib/parser-combinators/parser-combinators.factor b/contrib/parser-combinators/parser-combinators.factor
index e55eeebb34..754d2679bd 100644
--- a/contrib/parser-combinators/parser-combinators.factor
+++ b/contrib/parser-combinators/parser-combinators.factor
@@ -20,9 +20,11 @@
 ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-USING: lazy-lists kernel sequences strings math io ;
+USING: lazy-lists kernel sequences strings math io arrays ;
 IN: parser-combinators
 
+TUPLE: parse-result parsed unparsed ;
+
 : h:t ( object -- head tail )
   #! Return the head and tail of the object.
   dup first swap 1 tail ;
@@ -31,26 +33,26 @@ IN: parser-combinators
   #! A parser that parses a specific sequence of
   #! characters.
   2dup length head over = [
-    swap over length tail swons unit delay lunit
+    swap over length tail <parse-result> lunit
   ] [
-    2drop lnil
+    2drop nil
   ] if ;
 
 : token ( string -- parser )
   #! Return a token parser that parses the given string.
-  [ token-parser ] cons ;
+  [ token-parser ] curry ;
 
 : satisfy-parser ( inp pred -- llist )
   #! A parser that succeeds if the predicate,
   #! when passed the first character in the input, returns
   #! true.
   over empty? [
-    2drop lnil
+    2drop nil
   ] [        
     over first swap call [
-      h:t swons unit delay lunit
+      h:t <parse-result> lunit
     ] [
-      drop lnil
+      drop nil
     ] if
   ] if ;
   
@@ -58,7 +60,7 @@ IN: parser-combinators
   #! Return a parser that succeeds if the predicate 'p',
   #! when passed the first character in the input, returns
   #! true.
-  [ satisfy-parser ] cons ;
+  [ satisfy-parser ] curry ;
 
 : satisfy2-parser ( inp pred quot -- llist )
   #! A parser that succeeds if the predicate,
@@ -68,21 +70,21 @@ IN: parser-combinators
   #! of that call is returned as the result portion of the
   #! successfull parse lazy list.
   -rot over first swap call [ ( quot inp -- )
-    h:t >r swap call r> swons unit delay lunit
+    h:t >r swap call r> <parse-result> lunit
   ] [
-    2drop lnil
+    2drop nil
   ] if ;
 
   : satisfy2 ( pred quot -- parser )
   #! Return a satisfy2-parser.
-  [ satisfy2-parser ] cons cons ;
+  [ satisfy2-parser ] curry curry ;
 
 : epsilon-parser ( input -- llist )
   #! A parser that parses the empty string. It
   #! does not consume any input and always returns
   #! an empty list as the parse tree with the
   #! unmodified input.
-  "" cons unit delay lunit ;
+  "" swap <parse-result> lunit ;
 
 : epsilon ( -- parser )
   #! Return an epsilon parser
@@ -91,53 +93,51 @@ IN: parser-combinators
 : succeed-parser ( input result -- llist )
   #! A parser that always returns 'result' as a
   #! successful parse with no input consumed.
-  cons unit delay lunit ;
+  swap <parse-result> lunit ;
 
 : succeed ( result -- parser )
   #! Return a succeed parser.
-  [ succeed-parser ] cons ;
+  [ succeed-parser ] curry ;
 
 : fail-parser ( input -- llist )
   #! A parser that always fails and returns
   #! an empty list of successes.
-  drop lnil ;
+  drop nil ;
 
 : fail ( -- parser )
   #! Return a fail-parser.
   [ fail-parser ] ;
 
-: <&>-do-parser3 ( [[ x1 xs2 ]] x -- result )
+: <&>-do-parser3 ( <parse-result> x -- result )
   #! Called by <&>-do-parser2 on each result of the
   #! parse from parser2. 
-  >r uncons r> ( x1 xs2 x )
-  swap cons cons ;
+  >r dup parse-result-unparsed swap parse-result-parsed r> ( x1 xs2 x )
+  swap 2array  swap <parse-result> ;
 
-: unswons uncons swap ;
-
-: <&>-do-parser2 ( [[ x xs ]] parser2 -- result )
+: <&>-do-parser2 ( <parse-result> parser2 -- result )
   #! Called by the <&>-parser on each result of the
   #! successfull parse of parser1. It's input is the
   #! cons containing the data parsed and the remaining
   #! input. This word will parser2 on the remaining input
   #! returning a new cons cell containing the combined
   #! parse result.
-  >r unswons r> ( x xs parser2 )
+  >r dup parse-result-parsed swap parse-result-unparsed r> ( x xs parser2 )
   call swap    ( llist x )
-  [ <&>-do-parser3 ] cons lmap ;
+  [ <&>-do-parser3 ] curry lmap ;
 
 : <&>-parser ( input parser1 parser2 -- llist )
   #! Parse 'input' by sequentially combining the
   #! two parsers. First parser1 is applied to the
   #! input then parser2 is applied to the rest of
   #! the input strings from the first parser. 
-  >r call r>   ( [[ x xs ]] p2 -- result )
-  [ <&>-do-parser2 ] cons lmap lappend* ;
+  >r call r>   ( <parse-result> p2 -- result )
+  [ <&>-do-parser2 ] curry lmap lappend* ;
 
 : <&> ( parser1 parser2 -- parser )
   #! Sequentially combine two parsers, returning a parser
   #! that first calls p1, then p2 all remaining results from
   #! p1. 
-  [ <&>-parser ] cons cons ;
+  [ <&>-parser ] curry curry ;
 
 : <|>-parser ( input parser1 parser2 -- result )
   #! Return the combined list resulting from the parses
@@ -148,7 +148,7 @@ IN: parser-combinators
 : <|> ( p1 p2 -- parser )
   #! Choice operator for parsers. Return a parser that does
   #! p1 or p2 depending on which will succeed.
-  [ <|>-parser ] cons cons ;
+  [ <|>-parser ] curry curry ;
 
 : string-ltrim ( string -- string )
   #! Return a new string without any leading whitespace
@@ -163,25 +163,25 @@ IN: parser-combinators
 : sp ( parser -- parser )
   #! Return a parser that first skips all whitespace before
   #! calling the original parser.
-  [ sp-parser ] cons ;
+  [ sp-parser ] curry ;
 
 : just-parser ( input parser -- result )
   #! Calls the given parser on the input removes
   #! from the results anything where the remaining
   #! input to be parsed is not empty. So ensures a 
   #! fully parsed input string.
-  call [ car empty? ] lsubset ;
+  call [ parse-result-unparsed empty? ] lsubset ;
 
 : just ( parser -- parser )
   #! Return an instance of the just-parser.
-  [ just-parser ] cons ;
+  [ just-parser ] curry ;
 
 : (<@-parser-replace) ( [[ inp result ]] quot -- [[ inp new-result ]] )
   #! Perform the result replacement step of <@-parser. 
   #! Given a successfull parse result, calls the quotation
   #! with the result portion on the stack. The result of
   #! that call is then used as the new result.
-  swap uncons rot call cons ;
+  swap dup parse-result-unparsed swap parse-result-parsed rot call swap <parse-result> ;
 
 : <@-parser ( input parser quot -- result )
   #! Calls the parser on the input. For each successfull
@@ -189,26 +189,26 @@ IN: parser-combinators
   #! The result of that quotation then becomes the new parse result.
   #! This allows modification of parse tree results (like
   #! converting strings to integers, etc).
-  -rot call dup lnil? [ ( quot lnil -- )
+  -rot call dup nil? [ ( quot nil -- )
     nip
   ] [ ( quot result -- )
-    [ (<@-parser-replace) ] rot swons lmap
+    [ (<@-parser-replace) ] rot swap curry lmap
   ] if ;
 
 : <@ ( parser quot -- parser )
   #! Return an <@-parser.
-  [ <@-parser ] cons cons ;
+  [ <@-parser ] curry curry ;
 
 : some-parser ( input parser -- result )
   #! Calls the parser on the input, guarantees
   #! the parse is complete (the remaining input is empty),
   #! picks the first solution and only returns the parse
   #! tree since the remaining input is empty.
-  just call lcar cdr ;
+  just call car parse-result-parsed ;
 
 : some ( parser -- deterministic-parser )
   #! Creates a 'some-parser'.
-  [ some-parser ] cons ;
+  [ some-parser ] curry ;
 
 : <&-parser ( input parser1 parser2 -- result )
   #! Same as <&> except discard the results of the second parser.
@@ -216,31 +216,23 @@ IN: parser-combinators
 
 : <& ( parser1 parser2 -- parser )
   #! Same as <&> except discard the results of the second parser.
-  [ <&-parser ] cons cons ;
+  [ <&-parser ] curry curry ;
 
 : &>-parser ( input parser1 parser2 -- result )
   #! Same as <&> except discard the results of the first parser.
-  <&> [ 1 tail ] <@ call ;
+  <&> [ second ] <@ call ;
 
 : &> ( parser1 parser2 -- parser )
   #! Same as <&> except discard the results of the first parser.
-  [ &>-parser ] cons cons ;
-
-: (a,(b,c))>((a,b,c)) ( list -- list )
-  #! Convert a list where the car is a single value 
-  #! and the cdr is a list to a list containing a flattened
-  #! list.
-  uncons car cons unit ;
+  [ &>-parser ] curry curry ;
 
 : <:&>-parser ( input parser1 parser2 -- result )
-  #! Same as <&> except postprocess the result with
-  #! (a,(b,c))>((a,b,c)).
-  <&> [ (a,(b,c))>((a,b,c)) ] <@ call ;
+  #! Same as <&> except flatten the result.
+  <&> [ flatten ] <@ call ;
 
 : <:&> ( parser1 parser2 -- parser )
-  #! Same as <&> except postprocess the result with
-  #! (a,(b,c))>((a,b,c)).
-  [ <:&>-parser ] cons cons ;
+  #! Same as <&> except flatten the result.
+  [ <:&>-parser ] curry curry ;
 
 DEFER: <*>
 
@@ -251,7 +243,7 @@ DEFER: <*>
 : <*> ( parser -- parser )
   #! Return a parser that accepts zero or more occurences of the original
   #! parser.
-  [  (<*>) call ] cons ;
+  [  (<*>) call ] curry ;
 
 : (<+>) ( parser -- parser )
   #! Non-delayed implementation of <+>
@@ -260,7 +252,7 @@ DEFER: <*>
 : <+> ( parser -- parser )
   #! Return a parser that accepts one or more occurences of the original
   #! parser.
-  [  (<+>) call ] cons ;
+  [  (<+>) call ] curry ;
 
 : (<?>) ( parser -- parser )
   #! Non-delayed implementation of <?>
@@ -269,4 +261,4 @@ DEFER: <*>
 : <?> ( parser -- parser )
   #! Return a parser that optionally uses the parser
   #! if that parser would be successfull.
-  [  (<?>) call ] cons ;
+  [  (<?>) call ] curry ;
diff --git a/contrib/parser-combinators/tests.factor b/contrib/parser-combinators/tests.factor
index f2bf3cf3c9..5863ddcbe4 100644
--- a/contrib/parser-combinators/tests.factor
+++ b/contrib/parser-combinators/tests.factor
@@ -20,77 +20,77 @@
 ! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 ! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-USING: kernel lazy test errors strings parser lists math sequences parser-combinators ;
+USING: kernel lazy-lists test errors strings parser math sequences parser-combinators arrays ;
 IN: scratchpad
 
 ! Testing <&>
-[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> call llist>list
+[ { T{ parse-result f { "a" "b" } "cd" } }  ] [
+  "abcd" "a" token "b" token <&> call list>array
 ] unit-test
 
-[ [ [[ "d" [[ [[ "a" "b" ]] "c" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> "c" token <&> call llist>list
+[ { T{ parse-result f { { "a" "b" } "c" } "d" } } ] [
+  "abcd" "a" token "b" token <&> "c" token <&> call list>array
 ] unit-test
 
-[ [ [[ "d" [[ "a" [[ "b" "c" ]] ]] ]] ] ] [
-  "abcd" "a" token "b" token "c" token <&> <&> call llist>list
+[ { T{ parse-result f { "a" { "b" "c" } } "d" } } ] [
+  "abcd" "a" token "b" token "c" token <&> <&> call list>array
 ] unit-test
 
-[ f ] [
-  "decd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "decd" "a" token "b" token <&> call list>array
 ] unit-test
 
-[ f ] [
-  "dbcd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "dbcd" "a" token "b" token <&> call list>array
 ] unit-test
 
-[ f ] [
-  "adcd" "a" token "b" token <&> call llist>list
+[ { } ] [
+  "adcd" "a" token "b" token <&> call list>array
 ] unit-test
 
 ! Testing <|>
-[ [ [[ "bcd" "a" ]] ] ] [
-  "abcd" "a" token "b" token <|> call llist>list
+[ { T{ parse-result f "a" "bcd" } } ] [
+  "abcd" "a" token "b" token <|> call list>array
 ] unit-test
 
-[ [ [[ "bcd" "b" ]] ] ] [
-  "bbcd" "a" token "b" token <|> call llist>list
+[ { T{ parse-result f "b" "bcd" } }  ] [
+  "bbcd" "a" token "b" token <|> call list>array
 ] unit-test
 
-[ f ] [
-  "cbcd" "a" token "b" token <|> call llist>list
+[ { } ] [
+  "cbcd" "a" token "b" token <|> call list>array
 ] unit-test
 
 ! Testing sp
-[ f ] [
-  "  abcd" "a" token call llist>list 
+[ { } ] [
+  "  abcd" "a" token call list>array 
 ] unit-test
 
-[ [ [[ "bcd" "a" ]] ] ] [
-  "  abcd" "a" token sp call llist>list 
+[ { T{ parse-result f "a" "bcd" } }  ] [
+  "  abcd" "a" token sp call list>array 
 ] unit-test
 
 ! Testing just
-[ [ [[ "" "abcd" ]] [[ "d" "abc" ]] ] ] [
-  "abcd" "abcd" token "abc" token <|> call llist>list
+[ { T{ parse-result f "abcd" "" } T{ parse-result f "abc" "d" } } ] [
+  "abcd" "abcd" token "abc" token <|> call list>array
 ] unit-test
 
-[ [ [[ "" "abcd" ]] ] ] [
-  "abcd" "abcd" token "abc" token <|> just call llist>list
+[ { T{ parse-result f "abcd" "" } } ] [
+  "abcd" "abcd" token "abc" token <|> just call list>array
 ] unit-test 
 
 ! Testing <@
-[ [ [[ "1234" 48 ]] ] ] [
-  "01234" [ digit? ] satisfy call llist>list 
+[ { T{ parse-result f 48 "1234" } } ] [
+  "01234" [ digit? ] satisfy call list>array 
 ] unit-test
 
-[ [ [[ "1234" 0 ]] ] ] [
-  "01234" [ digit? ] satisfy [ digit> ] <@ call llist>list 
+[ { T{ parse-result f 0 "1234" } } ] [
+  "01234" [ digit? ] satisfy [ digit> ] <@ call list>array 
 ] unit-test
 
 ! Testing some
-[ [ [[ "1" "begin" ]] ] ] [
-  "begin1" "begin" token call llist>list
+[ { T{ parse-result f "begin" "1" } } ] [
+  "begin1" "begin" token call list>array
 ] unit-test
 
 [
@@ -101,160 +101,71 @@ IN: scratchpad
   "begin" "begin" token some call 
 ] unit-test
 
-! parens test function
-: parens ( -- parser )
-  #! Return a parser that parses nested parentheses.
-  [ "(" token parens <&> ")" token <&> parens <&> epsilon <|> call ]  ;
-
-[ [ [[ "" "" ]] ] ] [
-  "" parens call llist>list
-] unit-test
-
-[  
-  [[ "" [[ [[ [[ "(" "" ]] ")" ]] "" ]] ]]
-  [[ "()" "" ]]
-] [
-  "()" parens call [ ] leach
-] unit-test
-
-[ [[ "((()))" "" ]] ] [
-  "((()))" parens call lcdr lcar 
-] unit-test
-
 ! <& parser and &> parser
-[ [ [[ "cd" [[ "a" "b" ]] ]] ] ] [
-  "abcd" "a" token "b" token <&> call llist>list
+[ { T{ parse-result f { "a" "b" } "cd" } } ] [
+  "abcd" "a" token "b" token <&> call list>array
 ] unit-test
 
-[ [ [[ "cd" "a" ]] ] ] [
-  "abcd" "a" token "b" token <& call llist>list
+[ { T{ parse-result f "a" "cd" } } ] [
+  "abcd" "a" token "b" token <& call list>array
 ] unit-test
 
-[ [ [[ "cd" "b" ]] ] ] [
-  "abcd" "a" token "b" token &> call llist>list
-] unit-test
-
-! nesting example
-: parens-open "(" token ;
-: parens-close ")" token ;
-: nesting
-  [ parens-open 
-    nesting &> 
-    parens-close <& 
-    nesting <&> 
-    [ unswons 1 + max ] <@
-    0 succeed <|> 
-    call ] ;
-
-[ [ [[ "" 0 ]] ] ] [
-  "" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 1 ]] ] ] [
-  "()" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 2 ]] ] ] [
-  "(())" nesting just call llist>list
-] unit-test
-
-[ [ [[ "" 3 ]] ] ] [
-  "()(()(()()))()" nesting just call llist>list
+[ { T{ parse-result f "b" "cd" } } ] [
+  "abcd" "a" token "b" token &> call list>array
 ] unit-test
 
 ! Testing <*> and <:&>
-[ [ [ "234" [ "1" ] ] [ "1234" ] ] ] [
-  "1234" "1" token <*> call llist>list
+[ { T{ parse-result f { "1" } "234" } T{ parse-result f [ ] "1234" } } ] [
+  "1234" "1" token <*> call list>array
 ] unit-test
 
 [ 
-  [ "234" [ "1" "1" "1" "1" ] ]
-  [ "1234" [ "1" "1" "1" ] ]
-  [ "11234" [ "1" "1" ] ]
-  [ "111234" [ "1" ] ]
-  [ "1111234" ]
+  {
+    T{ parse-result f { "1" "1" "1" "1" } "234" }
+    T{ parse-result f { "1" "1" "1" } "1234" }
+    T{ parse-result f { "1" "1" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+    T{ parse-result f [ ] "1111234" }
+  }
+
 ] [
-  "1111234" "1" token <*> call [ ] leach
+  "1111234" "1" token <*> call list>array
 ] unit-test
 
 [ 
-  [ "234" "1111" ]
-  [ "1234" "111" ]
-  [ "11234" "11" ]
-  [ "111234" "1" ]
-  [ "1111234" f ]
+  {
+    T{ parse-result f { "1111" } "234" }
+    T{ parse-result f { "111" } "1234" }
+    T{ parse-result f { "11" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+    T{ parse-result f { [ ] } "1111234" }
+  }
 ] [
-  "1111234" "1" token <*> [ car concat unit ] <@ call [ ] leach
+  "1111234" "1" token <*> [ concat 1array ] <@ call list>array
 ] unit-test
 
-[ [ "234" ] ] [
-  "234" "1" token <*> call [ ] leach
-] unit-test
-
-: pdigit [ digit? ] satisfy [ digit> ] <@ ;
-: pnatural pdigit <*> ;
-: pnatural2 pnatural [ car [ >digit ] map >string dup pempty? [ drop 0 ] [ string>number ] if unit ] <@ ;
-
-[ 
-  [ "" 12345 ]
-  [ "5" 1234 ]
-  [ "45" 123 ]
-  [ "345" 12 ]
-  [ "2345" 1 ]
-  [ "12345" 0 ]
-] [
-  "12345" pnatural2 call [ ] leach
+[ { T{ parse-result f [ ] "234" } } ] [
+  "234" "1" token <*> call list>array
 ] unit-test
 
 ! Testing <+>
-[ [ "234" [ "1" ] ] ] [
-  "1234" "1" token <+> call [ ] leach
+[ { T{ parse-result f { "1" } "234" } } ] [
+  "1234" "1" token <+> call list>array
 ] unit-test
 
 [ 
-  [ "234" [ "1" "1" "1" "1" ] ]
-  [ "1234" [ "1" "1" "1" ] ]
-  [ "11234" [ "1" "1" ] ]
-  [ "111234" [ "1" ] ]
+  {
+    T{ parse-result f { "1" "1" "1" "1" } "234" }
+    T{ parse-result f { "1" "1" "1" } "1234" }
+    T{ parse-result f { "1" "1" } "11234" }
+    T{ parse-result f { "1" } "111234" }
+  }
 ] [
-  "1111234" "1" token <+> call [ ] leach
+  "1111234" "1" token <+> call list>array
 ] unit-test
 
-[ ] [
-  "234" "1" token <+> call [ ] leach
+[ { } ] [
+  "234" "1" token <+> call list>array
 ] unit-test
 
-! Testing <?>
-[ [[ "" [[ [ "a" ] "b" ]] ]] ] [
-  "ab" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-[ ] [
-  "ac" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-[ [[ "" [[ [ "a" 5 ] "b" ]] ]] ] [
-  "a5b" "a" token pdigit <?> <&> "b" token <&> call [ ] leach
-] unit-test
-
-: pinteger "-" token <?> pnatural2 <&> [ uncons swap [ car -1 * ] when ] <@ ;
-
-[ 
-  [ "" 123 ]
-  [ "3" 12 ]
-  [ "23" 1 ]
-  [ "123" 0 ]
-] [
-  "123" pinteger call [ ] leach
-] unit-test
-
-[ 
-  [[ "" -123 ]]
-  [[ "3" -12 ]]
-  [[ "23" -1 ]]
-  [[ "123" 0 ]]
-  [ "-123" 0 ] 
-] [
-  "-123" pinteger call [ ] leach
-] unit-test