From 8e26b19cc0aa008af012af16f2f1055a10faa251 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 31 Mar 2009 18:49:41 -0500 Subject: [PATCH] state-parser works with sequences, not strings fix bug with take-until --- extra/html/parser/parser.factor | 10 ++--- extra/html/parser/state/state-tests.factor | 20 +++++---- extra/html/parser/state/state.factor | 47 ++++++++++++---------- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index 677737618b..94ef59bdfd 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -68,10 +68,10 @@ SYMBOL: tagstack [ blank? ] trim ; : read-comment ( state-parser -- ) - "-->" take-until-string make-comment-tag push-tag ; + "-->" take-until-sequence make-comment-tag push-tag ; : read-dtd ( state-parser -- ) - ">" take-until-string make-dtd-tag push-tag ; + ">" take-until-sequence make-dtd-tag push-tag ; : read-bang ( state-parser -- ) next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [ @@ -93,7 +93,7 @@ SYMBOL: tagstack : (parse-attributes) ( state-parser -- ) skip-whitespace - dup string-parse-end? [ + dup state-parse-end? [ drop ] [ [ @@ -108,7 +108,7 @@ SYMBOL: tagstack : (parse-tag) ( string -- string' hashtable ) [ [ read-token >lower ] [ parse-attributes ] bi - ] string-parse ; + ] state-parse ; : read-< ( state-parser -- string/f ) next dup get-char [ @@ -126,7 +126,7 @@ SYMBOL: tagstack ] [ drop ] if ; : tag-parse ( quot -- vector ) - V{ } clone tagstack [ string-parse ] with-variable ; inline + V{ } clone tagstack [ state-parse ] with-variable ; inline : parse-html ( string -- vector ) [ (parse-html) tagstack get ] tag-parse ; diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index f676649aa8..f9862e1e69 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -2,29 +2,35 @@ USING: tools.test html.parser.state ascii kernel accessors ; IN: html.parser.state.tests [ "hello" ] -[ "hello" [ take-rest ] string-parse ] unit-test +[ "hello" [ take-rest ] state-parse ] unit-test [ "hi" " how are you?" ] [ "hi how are you?" - [ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse + [ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse ] unit-test [ "foo" ";bar" ] [ "foo;bar" [ - [ CHAR: ; take-until-char ] [ take-rest ] bi - ] string-parse + [ CHAR: ; take-until-object ] [ take-rest ] bi + ] state-parse ] unit-test [ "foo " " bar" ] [ "foo and bar" [ - [ "and" take-until-string ] [ take-rest ] bi - ] string-parse + [ "and" take-until-sequence ] [ take-rest ] bi + ] state-parse ] unit-test [ 6 ] [ - " foo " [ skip-whitespace i>> ] string-parse + " foo " [ skip-whitespace n>> ] state-parse ] unit-test + +[ { 1 2 } ] +[ { 1 2 3 } [ 3 = ] take-until ] unit-test + +[ { 1 2 } ] +[ { 1 2 3 4 } { 3 4 } take-until-sequence ] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index c69fd76af5..2369b1d750 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -2,31 +2,32 @@ ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular unicode.case unicode.categories locals ; + IN: html.parser.state -TUPLE: state-parser string i ; +TUPLE: state-parser sequence n ; -: ( string -- state-parser ) +: ( sequence -- state-parser ) state-parser new - swap >>string - 0 >>i ; + swap >>sequence + 0 >>n ; -: (get-char) ( i state -- char/f ) - string>> ?nth ; inline +: (get-char) ( n state -- char/f ) + sequence>> ?nth ; inline : get-char ( state -- char/f ) - [ i>> ] keep (get-char) ; inline + [ n>> ] keep (get-char) ; inline : get-next ( state -- char/f ) - [ i>> 1+ ] keep (get-char) ; inline + [ n>> 1 + ] keep (get-char) ; inline : next ( state -- state ) - [ 1+ ] change-i ; inline + [ 1 + ] change-n ; inline : get+increment ( state -- char/f ) [ get-char ] [ next drop ] bi ; inline -: string-parse ( string quot -- ) +: state-parse ( sequence quot -- ) [ ] dip call ; inline :: skip-until ( state quot: ( obj -- ? ) -- ) @@ -34,17 +35,23 @@ TUPLE: state-parser string i ; quot call [ state next quot skip-until ] unless ] when* ; inline recursive -: take-until ( state quot: ( obj -- ? ) -- string ) - [ drop i>> ] - [ skip-until ] - [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline +: state-parse-end? ( state -- ? ) get-next not ; -:: take-until-string ( state-parser string -- string' ) - string length :> growing +: take-until ( state quot: ( obj -- ? ) -- sequence/f ) + over state-parse-end? [ + 2drop f + ] [ + [ drop n>> ] + [ skip-until ] + [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq + ] if ; inline + +:: take-until-sequence ( state-parser sequence -- sequence' ) + sequence length :> growing state-parser [ growing push-growing-circular - string growing sequence= + sequence growing sequence= ] take-until :> found found dup length growing length 1- - head @@ -53,10 +60,8 @@ TUPLE: state-parser string i ; : skip-whitespace ( state -- state ) [ [ blank? not ] take-until drop ] keep ; -: take-rest ( state -- string ) +: take-rest ( state -- sequence ) [ drop f ] take-until ; inline -: take-until-char ( state ch -- string ) +: take-until-object ( state obj -- sequence ) '[ _ = ] take-until ; - -: string-parse-end? ( state -- ? ) get-next not ;