From a07c17598efaaffee33c146464573d184042c53c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 31 Mar 2009 16:04:39 -0500 Subject: [PATCH] redo state parser to avoid dynamic variables --- extra/html/parser/state/state-tests.factor | 34 +++++++--- extra/html/parser/state/state.factor | 73 ++++++++++++++-------- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index da70d0fa12..f676649aa8 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -1,14 +1,30 @@ -USING: tools.test html.parser.state ascii kernel ; +USING: tools.test html.parser.state ascii kernel accessors ; IN: html.parser.state.tests -: take-rest ( -- string ) - [ f ] take-until ; +[ "hello" ] +[ "hello" [ take-rest ] string-parse ] unit-test -: take-char ( -- string ) - [ get-char = ] curry take-until ; +[ "hi" " how are you?" ] +[ + "hi how are you?" + [ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse +] unit-test + +[ "foo" ";bar" ] +[ + "foo;bar" [ + [ CHAR: ; take-until-char ] [ take-rest ] bi + ] string-parse +] unit-test -[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test -[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test -[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test [ "foo " " bar" ] -[ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test +[ + "foo and bar" [ + [ "and" take-until-string ] [ take-rest ] bi + ] string-parse +] unit-test + +[ 6 ] +[ + " foo " [ skip-whitespace i>> ] string-parse +] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index 1b3f188a78..c69fd76af5 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -1,41 +1,62 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: namespaces math kernel sequences accessors fry circular ; +USING: namespaces math kernel sequences accessors fry circular +unicode.case unicode.categories locals ; IN: html.parser.state -TUPLE: state string i ; +TUPLE: state-parser string i ; -: get-i ( -- i ) state get i>> ; inline +: ( string -- state-parser ) + state-parser new + swap >>string + 0 >>i ; -: get-char ( -- char ) - state get [ i>> ] [ string>> ] bi ?nth ; inline +: (get-char) ( i state -- char/f ) + string>> ?nth ; inline -: get-next ( -- char ) - state get [ i>> 1+ ] [ string>> ] bi ?nth ; inline +: get-char ( state -- char/f ) + [ i>> ] keep (get-char) ; inline -: next ( -- ) - state get [ 1+ ] change-i drop ; inline +: get-next ( state -- char/f ) + [ i>> 1+ ] keep (get-char) ; inline + +: next ( state -- state ) + [ 1+ ] change-i ; inline + +: get+increment ( state -- char/f ) + [ get-char ] [ next drop ] bi ; inline : string-parse ( string quot -- ) - [ 0 state boa state ] dip with-variable ; inline + [ ] dip call ; inline -: short* ( n seq -- n' seq ) - over [ nip dup length swap ] unless ; inline +:: skip-until ( state quot: ( obj -- ? ) -- ) + state get-char [ + quot call [ state next quot skip-until ] unless + ] when* ; inline recursive -: skip-until ( quot: ( -- ? ) -- ) - get-char [ - [ call ] keep swap - [ drop ] [ next skip-until ] if - ] [ drop ] if ; inline recursive +: take-until ( state quot: ( obj -- ? ) -- string ) + [ drop i>> ] + [ skip-until ] + [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline -: take-until ( quot: ( -- ? ) -- ) - get-i [ skip-until ] dip get-i - state get string>> subseq ; inline +:: take-until-string ( state-parser string -- string' ) + string length :> growing + state-parser + [ + growing push-growing-circular + string growing sequence= + ] take-until :> found + found dup length + growing length 1- - head + state-parser next drop ; + +: skip-whitespace ( state -- state ) + [ [ blank? not ] take-until drop ] keep ; -: string-matches? ( string circular -- ? ) - get-char over push-growing-circular sequence= ; inline +: take-rest ( state -- string ) + [ drop f ] take-until ; inline -: take-string ( match -- string ) - dup length - [ 2dup string-matches? ] take-until nip - dup length rot length 1- - head next ; inline +: take-until-char ( state ch -- string ) + '[ _ = ] take-until ; + +: string-parse-end? ( state -- ? ) get-next not ;