From 984b68d00b75e90a29859201c926f01b831d5a16 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 21 Jan 2009 21:57:44 -0600 Subject: [PATCH] Fixing HTML dependancy on state-parser --- extra/html/parser/parser.factor | 20 +++++------ extra/html/parser/state/state-tests.factor | 13 +++++++ extra/html/parser/state/state.factor | 41 ++++++++++++++++++++++ extra/html/parser/utils/utils-tests.factor | 2 +- extra/html/parser/utils/utils.factor | 7 +--- 5 files changed, 66 insertions(+), 17 deletions(-) create mode 100644 extra/html/parser/state/state-tests.factor create mode 100644 extra/html/parser/state/state.factor diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index 836693026a..c445b708c5 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays html.parser.utils hashtables io kernel namespaces make prettyprint quotations sequences splitting -state-parser strings unicode.categories unicode.case ; +html.parser.state strings unicode.categories unicode.case ; IN: html.parser TUPLE: tag name attributes text closing? ; @@ -59,8 +59,8 @@ SYMBOL: tagstack [ get-char CHAR: " = ] take-until ; : read-quote ( -- string ) - get-char next* CHAR: ' = - [ read-single-quote ] [ read-double-quote ] if next* ; + get-char next CHAR: ' = + [ read-single-quote ] [ read-double-quote ] if next ; : read-key ( -- string ) read-whitespace* @@ -68,7 +68,7 @@ SYMBOL: tagstack : read-= ( -- ) read-whitespace* - [ get-char CHAR: = = ] take-until drop next* ; + [ get-char CHAR: = = ] take-until drop next ; : read-value ( -- string ) read-whitespace* @@ -76,14 +76,14 @@ SYMBOL: tagstack [ blank? ] trim ; : read-comment ( -- ) - "-->" take-string* make-comment-tag push-tag ; + "-->" take-string make-comment-tag push-tag ; : read-dtd ( -- ) - ">" take-string* make-dtd-tag push-tag ; + ">" take-string make-dtd-tag push-tag ; : read-bang ( -- ) - next* get-char CHAR: - = get-next CHAR: - = and [ - next* next* + next get-char CHAR: - = get-next CHAR: - = and [ + next next read-comment ] [ read-dtd @@ -91,10 +91,10 @@ SYMBOL: tagstack : read-tag ( -- string ) [ get-char CHAR: > = get-char CHAR: < = or ] take-until - get-char CHAR: < = [ next* ] unless ; + get-char CHAR: < = [ next ] unless ; : read-< ( -- string ) - next* get-char CHAR: ! = [ + next get-char CHAR: ! = [ read-bang f ] [ read-tag diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor new file mode 100644 index 0000000000..a9be38c0b5 --- /dev/null +++ b/extra/html/parser/state/state-tests.factor @@ -0,0 +1,13 @@ +USING: tools.test html.parser.state ascii kernel ; +IN: html.parser.state.tests + +: take-rest ( -- string ) + [ f ] take-until ; + +: take-char ( -- string ) + [ get-char = ] curry take-until ; + +[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test +[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test +[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test +! [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor new file mode 100644 index 0000000000..4b1027d338 --- /dev/null +++ b/extra/html/parser/state/state.factor @@ -0,0 +1,41 @@ +! Copyright (C) 2005, 2009 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. +USING: namespaces math kernel sequences accessors fry circular ; +IN: html.parser.state + +TUPLE: state string i ; + +: get-i ( -- i ) state get i>> ; + +: get-char ( -- char ) + state get [ i>> ] [ string>> ] bi ?nth ; + +: get-next ( -- char ) + state get [ i>> 1+ ] [ string>> ] bi ?nth ; + +: next ( -- ) + state get [ 1+ ] change-i drop ; + +: string-parse ( string quot -- ) + [ 0 state boa state ] dip with-variable ; + +: short* ( n seq -- n' seq ) + over [ nip dup length swap ] unless ; + +: skip-until ( quot: ( -- ? ) -- ) + get-char [ + [ call ] keep swap + [ drop ] [ next skip-until ] if + ] [ drop ] if ; inline recursive + +: take-until ( quot: ( -- ? ) -- ) + [ get-i ] dip skip-until get-i + state get string>> subseq ; + +: string-matches? ( string circular -- ? ) + get-char over push-circular sequence= ; + +: take-string ( match -- string ) + dup length + [ 2dup string-matches? ] take-until nip + dup length rot length 1- - head next ; diff --git a/extra/html/parser/utils/utils-tests.factor b/extra/html/parser/utils/utils-tests.factor index 4b25db16fd..6d8e3bc05f 100644 --- a/extra/html/parser/utils/utils-tests.factor +++ b/extra/html/parser/utils/utils-tests.factor @@ -1,7 +1,7 @@ USING: assocs combinators continuations hashtables hashtables.private io kernel math namespaces prettyprint quotations sequences splitting -state-parser strings tools.test ; +strings tools.test ; USING: html.parser.utils ; IN: html.parser.utils.tests diff --git a/extra/html/parser/utils/utils.factor b/extra/html/parser/utils/utils.factor index c2a9d73af8..c913b9d306 100644 --- a/extra/html/parser/utils/utils.factor +++ b/extra/html/parser/utils/utils.factor @@ -2,17 +2,12 @@ ! See http://factorcode.org/license.txt for BSD license. USING: assocs circular combinators continuations hashtables hashtables.private io kernel math namespaces prettyprint -quotations sequences splitting state-parser strings +quotations sequences splitting html.parser.state strings combinators.short-circuit ; IN: html.parser.utils : string-parse-end? ( -- ? ) get-next not ; -: take-string* ( match -- string ) - dup length - [ 2dup string-matches? ] take-until nip - dup length rot length 1- - head next* ; - : trim1 ( seq ch -- newseq ) [ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;