state-parser works with sequences, not strings

fix bug with take-until
2009-03-31 18:49:41 -05:00 · 2009-03-31 18:49:41 -05:00 · 8e26b19cc0
parent e22823f2c4
commit 8e26b19cc0
3 changed files with 44 additions and 33 deletions
--- a/extra/html/parser/parser.factor
+++ b/extra/html/parser/parser.factor
@ -68,10 +68,10 @@ SYMBOL: tagstack
    [ blank? ] trim ;
 : read-comment ( state-parser -- )
-    "-->" take-until-string make-comment-tag push-tag ;
+    "-->" take-until-sequence make-comment-tag push-tag ;
 : read-dtd ( state-parser -- )
-    ">" take-until-string make-dtd-tag push-tag ;
+    ">" take-until-sequence make-dtd-tag push-tag ;
 : read-bang ( state-parser -- )
    next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
@ -93,7 +93,7 @@ SYMBOL: tagstack
 : (parse-attributes) ( state-parser -- )
    skip-whitespace
-    dup string-parse-end? [
+    dup state-parse-end? [
        drop
    ] [
        [
@ -108,7 +108,7 @@ SYMBOL: tagstack
 : (parse-tag) ( string -- string' hashtable )
    [
        [ read-token >lower ] [ parse-attributes ] bi
-    ] string-parse ;
+    ] state-parse ;
 : read-< ( state-parser -- string/f )
    next dup get-char [
@ -126,7 +126,7 @@ SYMBOL: tagstack
    ] [ drop ] if ;
 : tag-parse ( quot -- vector )
-    V{ } clone tagstack [ string-parse ] with-variable ; inline
+    V{ } clone tagstack [ state-parse ] with-variable ; inline
 : parse-html ( string -- vector )
    [ (parse-html) tagstack get ] tag-parse ;
--- a/extra/html/parser/state/state-tests.factor
+++ b/extra/html/parser/state/state-tests.factor
@ -2,29 +2,35 @@ USING: tools.test html.parser.state ascii kernel accessors ;
 IN: html.parser.state.tests
 [ "hello" ]
-[ "hello" [ take-rest ] string-parse ] unit-test
+[ "hello" [ take-rest ] state-parse ] unit-test
 [ "hi" " how are you?" ]
 [
    "hi how are you?"
-    [ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse
+    [ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse
 ] unit-test
 [ "foo" ";bar" ]
 [
    "foo;bar" [
-        [ CHAR: ; take-until-char ] [ take-rest ] bi
+        [ CHAR: ; take-until-object ] [ take-rest ] bi
-    ] string-parse
+    ] state-parse
 ] unit-test
 [ "foo " " bar" ]
 [
    "foo and bar" [
-        [ "and" take-until-string ] [ take-rest ] bi 
+        [ "and" take-until-sequence ] [ take-rest ] bi 
-    ] string-parse
+    ] state-parse
 ] unit-test
 [ 6 ]
 [
-    "      foo   " [ skip-whitespace i>> ] string-parse
+    "      foo   " [ skip-whitespace n>> ] state-parse
 ] unit-test
 [ { 1 2 } ]
 [ { 1 2 3 } <state-parser> [ 3 = ] take-until ] unit-test
 [ { 1 2 } ]
 [ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test
--- a/extra/html/parser/state/state.factor
+++ b/extra/html/parser/state/state.factor
@ -2,31 +2,32 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: namespaces math kernel sequences accessors fry circular
 unicode.case unicode.categories locals ;
 IN: html.parser.state
-TUPLE: state-parser string i ;
+TUPLE: state-parser sequence n ;
-: <state-parser> ( string -- state-parser )
+: <state-parser> ( sequence -- state-parser )
    state-parser new
-        swap >>string
+        swap >>sequence
-        0 >>i ;
+        0 >>n ;
-: (get-char) ( i state -- char/f )
+: (get-char) ( n state -- char/f )
-    string>> ?nth ; inline
+    sequence>> ?nth ; inline
 : get-char ( state -- char/f )
-    [ i>> ] keep (get-char) ; inline
+    [ n>> ] keep (get-char) ; inline
 : get-next ( state -- char/f )
-    [ i>> 1+ ] keep (get-char) ; inline
+    [ n>> 1 + ] keep (get-char) ; inline
 : next ( state -- state )
-    [ 1+ ] change-i ; inline
+    [ 1 + ] change-n ; inline
 : get+increment ( state -- char/f )
    [ get-char ] [ next drop ] bi ; inline
-: string-parse ( string quot -- )
+: state-parse ( sequence quot -- )
    [ <state-parser> ] dip call ; inline
 :: skip-until ( state quot: ( obj -- ? ) -- )
@ -34,17 +35,23 @@ TUPLE: state-parser string i ;
        quot call [ state next quot skip-until ] unless
    ] when* ; inline recursive
-: take-until ( state quot: ( obj -- ? ) -- string )
+: state-parse-end? ( state -- ? ) get-next not ;
    [ drop i>> ]
    [ skip-until ]
    [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline
-:: take-until-string ( state-parser string -- string' )
+: take-until ( state quot: ( obj -- ? ) -- sequence/f )
-    string length <growing-circular> :> growing
+    over state-parse-end? [
        2drop f
    ] [
        [ drop n>> ]
        [ skip-until ]
        [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
    ] if ; inline
 :: take-until-sequence ( state-parser sequence -- sequence' )
    sequence length <growing-circular> :> growing
    state-parser
    [
        growing push-growing-circular
-        string growing sequence=
+        sequence growing sequence=
    ] take-until :> found
    found dup length
    growing length 1- - head
@ -53,10 +60,8 @@ TUPLE: state-parser string i ;
 : skip-whitespace ( state -- state )
    [ [ blank? not ] take-until drop ] keep ;
-: take-rest ( state -- string )
+: take-rest ( state -- sequence )
    [ drop f ] take-until ; inline
-: take-until-char ( state ch -- string )
+: take-until-object ( state obj -- sequence )
    '[ _ = ] take-until ;
 : string-parse-end? ( state -- ? ) get-next not ;