Fixing HTML dependancy on state-parser
parent
465ed2fca8
commit
984b68d00b
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays html.parser.utils hashtables io kernel
|
USING: accessors arrays html.parser.utils hashtables io kernel
|
||||||
namespaces make prettyprint quotations sequences splitting
|
namespaces make prettyprint quotations sequences splitting
|
||||||
state-parser strings unicode.categories unicode.case ;
|
html.parser.state strings unicode.categories unicode.case ;
|
||||||
IN: html.parser
|
IN: html.parser
|
||||||
|
|
||||||
TUPLE: tag name attributes text closing? ;
|
TUPLE: tag name attributes text closing? ;
|
||||||
|
@ -59,8 +59,8 @@ SYMBOL: tagstack
|
||||||
[ get-char CHAR: " = ] take-until ;
|
[ get-char CHAR: " = ] take-until ;
|
||||||
|
|
||||||
: read-quote ( -- string )
|
: read-quote ( -- string )
|
||||||
get-char next* CHAR: ' =
|
get-char next CHAR: ' =
|
||||||
[ read-single-quote ] [ read-double-quote ] if next* ;
|
[ read-single-quote ] [ read-double-quote ] if next ;
|
||||||
|
|
||||||
: read-key ( -- string )
|
: read-key ( -- string )
|
||||||
read-whitespace*
|
read-whitespace*
|
||||||
|
@ -68,7 +68,7 @@ SYMBOL: tagstack
|
||||||
|
|
||||||
: read-= ( -- )
|
: read-= ( -- )
|
||||||
read-whitespace*
|
read-whitespace*
|
||||||
[ get-char CHAR: = = ] take-until drop next* ;
|
[ get-char CHAR: = = ] take-until drop next ;
|
||||||
|
|
||||||
: read-value ( -- string )
|
: read-value ( -- string )
|
||||||
read-whitespace*
|
read-whitespace*
|
||||||
|
@ -76,14 +76,14 @@ SYMBOL: tagstack
|
||||||
[ blank? ] trim ;
|
[ blank? ] trim ;
|
||||||
|
|
||||||
: read-comment ( -- )
|
: read-comment ( -- )
|
||||||
"-->" take-string* make-comment-tag push-tag ;
|
"-->" take-string make-comment-tag push-tag ;
|
||||||
|
|
||||||
: read-dtd ( -- )
|
: read-dtd ( -- )
|
||||||
">" take-string* make-dtd-tag push-tag ;
|
">" take-string make-dtd-tag push-tag ;
|
||||||
|
|
||||||
: read-bang ( -- )
|
: read-bang ( -- )
|
||||||
next* get-char CHAR: - = get-next CHAR: - = and [
|
next get-char CHAR: - = get-next CHAR: - = and [
|
||||||
next* next*
|
next next
|
||||||
read-comment
|
read-comment
|
||||||
] [
|
] [
|
||||||
read-dtd
|
read-dtd
|
||||||
|
@ -91,10 +91,10 @@ SYMBOL: tagstack
|
||||||
|
|
||||||
: read-tag ( -- string )
|
: read-tag ( -- string )
|
||||||
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
|
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
|
||||||
get-char CHAR: < = [ next* ] unless ;
|
get-char CHAR: < = [ next ] unless ;
|
||||||
|
|
||||||
: read-< ( -- string )
|
: read-< ( -- string )
|
||||||
next* get-char CHAR: ! = [
|
next get-char CHAR: ! = [
|
||||||
read-bang f
|
read-bang f
|
||||||
] [
|
] [
|
||||||
read-tag
|
read-tag
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
USING: tools.test html.parser.state ascii kernel ;
|
||||||
|
IN: html.parser.state.tests
|
||||||
|
|
||||||
|
: take-rest ( -- string )
|
||||||
|
[ f ] take-until ;
|
||||||
|
|
||||||
|
: take-char ( -- string )
|
||||||
|
[ get-char = ] curry take-until ;
|
||||||
|
|
||||||
|
[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
|
||||||
|
[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
|
||||||
|
[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
|
||||||
|
! [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test
|
|
@ -0,0 +1,41 @@
|
||||||
|
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: namespaces math kernel sequences accessors fry circular ;
|
||||||
|
IN: html.parser.state
|
||||||
|
|
||||||
|
TUPLE: state string i ;
|
||||||
|
|
||||||
|
: get-i ( -- i ) state get i>> ;
|
||||||
|
|
||||||
|
: get-char ( -- char )
|
||||||
|
state get [ i>> ] [ string>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: get-next ( -- char )
|
||||||
|
state get [ i>> 1+ ] [ string>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: next ( -- )
|
||||||
|
state get [ 1+ ] change-i drop ;
|
||||||
|
|
||||||
|
: string-parse ( string quot -- )
|
||||||
|
[ 0 state boa state ] dip with-variable ;
|
||||||
|
|
||||||
|
: short* ( n seq -- n' seq )
|
||||||
|
over [ nip dup length swap ] unless ;
|
||||||
|
|
||||||
|
: skip-until ( quot: ( -- ? ) -- )
|
||||||
|
get-char [
|
||||||
|
[ call ] keep swap
|
||||||
|
[ drop ] [ next skip-until ] if
|
||||||
|
] [ drop ] if ; inline recursive
|
||||||
|
|
||||||
|
: take-until ( quot: ( -- ? ) -- )
|
||||||
|
[ get-i ] dip skip-until get-i
|
||||||
|
state get string>> subseq ;
|
||||||
|
|
||||||
|
: string-matches? ( string circular -- ? )
|
||||||
|
get-char over push-circular sequence= ;
|
||||||
|
|
||||||
|
: take-string ( match -- string )
|
||||||
|
dup length <circular-string>
|
||||||
|
[ 2dup string-matches? ] take-until nip
|
||||||
|
dup length rot length 1- - head next ;
|
|
@ -1,7 +1,7 @@
|
||||||
USING: assocs combinators continuations hashtables
|
USING: assocs combinators continuations hashtables
|
||||||
hashtables.private io kernel math
|
hashtables.private io kernel math
|
||||||
namespaces prettyprint quotations sequences splitting
|
namespaces prettyprint quotations sequences splitting
|
||||||
state-parser strings tools.test ;
|
strings tools.test ;
|
||||||
USING: html.parser.utils ;
|
USING: html.parser.utils ;
|
||||||
IN: html.parser.utils.tests
|
IN: html.parser.utils.tests
|
||||||
|
|
||||||
|
|
|
@ -2,17 +2,12 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: assocs circular combinators continuations hashtables
|
USING: assocs circular combinators continuations hashtables
|
||||||
hashtables.private io kernel math namespaces prettyprint
|
hashtables.private io kernel math namespaces prettyprint
|
||||||
quotations sequences splitting state-parser strings
|
quotations sequences splitting html.parser.state strings
|
||||||
combinators.short-circuit ;
|
combinators.short-circuit ;
|
||||||
IN: html.parser.utils
|
IN: html.parser.utils
|
||||||
|
|
||||||
: string-parse-end? ( -- ? ) get-next not ;
|
: string-parse-end? ( -- ? ) get-next not ;
|
||||||
|
|
||||||
: take-string* ( match -- string )
|
|
||||||
dup length <circular-string>
|
|
||||||
[ 2dup string-matches? ] take-until nip
|
|
||||||
dup length rot length 1- - head next* ;
|
|
||||||
|
|
||||||
: trim1 ( seq ch -- newseq )
|
: trim1 ( seq ch -- newseq )
|
||||||
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
|
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue