rename html.parser.state to sequence-parser
parent
cdc3d1b643
commit
6583b4d38e
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: html.parser.state io io.encodings.utf8 io.files
|
||||
USING: sequence-parser io io.encodings.utf8 io.files
|
||||
io.streams.string kernel combinators accessors io.pathnames
|
||||
fry sequences arrays locals namespaces io.directories
|
||||
assocs math splitting make unicode.categories
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays hashtables html.parser.state
|
||||
USING: accessors arrays hashtables sequence-parser
|
||||
html.parser.utils kernel namespaces sequences
|
||||
unicode.case unicode.categories combinators.short-circuit
|
||||
quoting fry ;
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
USING: tools.test html.parser.state ascii kernel accessors ;
|
||||
IN: html.parser.state.tests
|
||||
|
||||
[ "hello" ]
|
||||
[ "hello" [ take-rest ] state-parse ] unit-test
|
||||
|
||||
[ "hi" " how are you?" ]
|
||||
[
|
||||
"hi how are you?"
|
||||
[ [ [ current blank? ] take-until ] [ take-rest ] bi ] state-parse
|
||||
] unit-test
|
||||
|
||||
[ "foo" ";bar" ]
|
||||
[
|
||||
"foo;bar" [
|
||||
[ CHAR: ; take-until-object ] [ take-rest ] bi
|
||||
] state-parse
|
||||
] unit-test
|
||||
|
||||
[ "foo " " bar" ]
|
||||
[
|
||||
"foo and bar" [
|
||||
[ "and" take-until-sequence ] [ take-rest ] bi
|
||||
] state-parse
|
||||
] unit-test
|
||||
|
||||
[ 6 ]
|
||||
[
|
||||
" foo " [ skip-whitespace n>> ] state-parse
|
||||
] unit-test
|
||||
|
||||
[ { 1 2 } ]
|
||||
[ { 1 2 3 } <state-parser> [ current 3 = ] take-until ] unit-test
|
||||
|
||||
[ { 1 2 } ]
|
||||
[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test
|
||||
|
||||
[ "ab" ]
|
||||
[ "abcd" <state-parser> "ab" take-sequence ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "abcd" <state-parser> "lol" take-sequence ] unit-test
|
||||
|
||||
[ "ab" ]
|
||||
[
|
||||
"abcd" <state-parser>
|
||||
[ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "abcd" <state-parser> "" take-sequence ] unit-test
|
||||
|
||||
[ "cd" ]
|
||||
[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc\" asdf" <state-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "abc\\\"def" ]
|
||||
[
|
||||
"\"abc\\\"def\" asdf" <state-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[
|
||||
"\"abc\" asdf" <state-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ skip-whitespace "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc asdf" <state-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "\"abc" ]
|
||||
[
|
||||
"\"abc asdf" <state-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ "\"abc" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "c" ]
|
||||
[ "c" <state-parser> take-token ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "" <state-parser> take-token ] unit-test
|
||||
|
||||
[ "abcd e \\\"f g" ]
|
||||
[ "\"abcd e \\\"f g\"" <state-parser> CHAR: \ CHAR: " take-token* ] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "" <state-parser> take-rest ] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "abc" <state-parser> dup "abc" take-sequence drop take-rest ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "abc" <state-parser> "abcdefg" take-sequence ] unit-test
|
|
@ -1,127 +0,0 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces math kernel sequences accessors fry circular
|
||||
unicode.case unicode.categories locals combinators.short-circuit
|
||||
make combinators io splitting ;
|
||||
|
||||
IN: html.parser.state
|
||||
|
||||
TUPLE: state-parser sequence n ;
|
||||
|
||||
: <state-parser> ( sequence -- state-parser )
|
||||
state-parser new
|
||||
swap >>sequence
|
||||
0 >>n ;
|
||||
|
||||
: offset ( state-parser offset -- char/f )
|
||||
swap
|
||||
[ n>> + ] [ sequence>> ?nth ] bi ; inline
|
||||
|
||||
: current ( state-parser -- char/f ) 0 offset ; inline
|
||||
|
||||
: previous ( state-parser -- char/f ) -1 offset ; inline
|
||||
|
||||
: peek-next ( state-parser -- char/f ) 1 offset ; inline
|
||||
|
||||
: advance ( state-parser -- state-parser )
|
||||
[ 1 + ] change-n ; inline
|
||||
|
||||
: advance* ( state-parser -- )
|
||||
advance drop ; inline
|
||||
|
||||
: get+increment ( state-parser -- char/f )
|
||||
[ current ] [ advance drop ] bi ; inline
|
||||
|
||||
:: skip-until ( state-parser quot: ( obj -- ? ) -- )
|
||||
state-parser current [
|
||||
state-parser quot call [ state-parser advance quot skip-until ] unless
|
||||
] when ; inline recursive
|
||||
|
||||
: state-parse-end? ( state-parser -- ? ) current not ;
|
||||
|
||||
: take-until ( state-parser quot: ( obj -- ? ) -- sequence/f )
|
||||
over state-parse-end? [
|
||||
2drop f
|
||||
] [
|
||||
[ drop n>> ]
|
||||
[ skip-until ]
|
||||
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
|
||||
] if ; inline
|
||||
|
||||
: take-while ( state-parser quot: ( obj -- ? ) -- sequence/f )
|
||||
[ not ] compose take-until ; inline
|
||||
|
||||
: <safe-slice> ( from to seq -- slice/f )
|
||||
3dup {
|
||||
[ 2drop 0 < ]
|
||||
[ [ drop ] 2dip length > ]
|
||||
[ drop > ]
|
||||
} 3|| [ 3drop f ] [ slice boa ] if ; inline
|
||||
|
||||
:: take-sequence ( state-parser sequence -- obj/f )
|
||||
state-parser [ n>> dup sequence length + ] [ sequence>> ] bi
|
||||
<safe-slice> sequence sequence= [
|
||||
sequence
|
||||
state-parser [ sequence length + ] change-n drop
|
||||
] [
|
||||
f
|
||||
] if ;
|
||||
|
||||
:: take-until-sequence ( state-parser sequence -- sequence' )
|
||||
sequence length <growing-circular> :> growing
|
||||
state-parser
|
||||
[
|
||||
current growing push-growing-circular
|
||||
sequence growing sequence=
|
||||
] take-until :> found
|
||||
found dup length
|
||||
growing length 1- - head
|
||||
state-parser advance drop ;
|
||||
|
||||
: skip-whitespace ( state-parser -- state-parser )
|
||||
[ [ current blank? not ] take-until drop ] keep ;
|
||||
|
||||
: take-rest-slice ( state-parser -- sequence/f )
|
||||
[ sequence>> ] [ n>> ] bi
|
||||
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
|
||||
|
||||
: take-rest ( state-parser -- sequence )
|
||||
[ take-rest-slice ] [ sequence>> like ] bi ;
|
||||
|
||||
: take-until-object ( state-parser obj -- sequence )
|
||||
'[ current _ = ] take-until ;
|
||||
|
||||
: state-parse ( sequence quot -- )
|
||||
[ <state-parser> ] dip call ; inline
|
||||
|
||||
:: take-quoted-string ( state-parser escape-char quote-char -- string )
|
||||
state-parser n>> :> start-n
|
||||
state-parser advance
|
||||
[
|
||||
{
|
||||
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
|
||||
[ current quote-char = not ]
|
||||
} 1||
|
||||
] take-while :> string
|
||||
state-parser current quote-char = [
|
||||
state-parser advance* string
|
||||
] [
|
||||
start-n state-parser (>>n) f
|
||||
] if ;
|
||||
|
||||
: (take-token) ( state-parser -- string )
|
||||
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
||||
|
||||
:: take-token* ( state-parser escape-char quote-char -- string/f )
|
||||
state-parser skip-whitespace
|
||||
dup current {
|
||||
{ quote-char [ escape-char quote-char take-quoted-string ] }
|
||||
{ f [ drop f ] }
|
||||
[ drop (take-token) ]
|
||||
} case ;
|
||||
|
||||
: take-token ( state-parser -- string/f )
|
||||
CHAR: \ CHAR: " take-token* ;
|
||||
|
||||
: write-full ( state-parser -- ) sequence>> write ;
|
||||
: write-rest ( state-parser -- ) take-rest write ;
|
|
@ -2,8 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs circular combinators continuations hashtables
|
||||
hashtables.private io kernel math namespaces prettyprint
|
||||
quotations sequences splitting html.parser.state strings
|
||||
combinators.short-circuit quoting ;
|
||||
quotations sequences splitting strings quoting
|
||||
combinators.short-circuit ;
|
||||
IN: html.parser.utils
|
||||
|
||||
: trim1 ( seq ch -- newseq )
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
USING: tools.test sequence-parser ascii kernel accessors ;
|
||||
IN: sequence-parser.tests
|
||||
|
||||
[ "hello" ]
|
||||
[ "hello" [ take-rest ] parse-sequence ] unit-test
|
||||
|
||||
[ "hi" " how are you?" ]
|
||||
[
|
||||
"hi how are you?"
|
||||
[ [ [ current blank? ] take-until ] [ take-rest ] bi ] parse-sequence
|
||||
] unit-test
|
||||
|
||||
[ "foo" ";bar" ]
|
||||
[
|
||||
"foo;bar" [
|
||||
[ CHAR: ; take-until-object ] [ take-rest ] bi
|
||||
] parse-sequence
|
||||
] unit-test
|
||||
|
||||
[ "foo " " bar" ]
|
||||
[
|
||||
"foo and bar" [
|
||||
[ "and" take-until-sequence ] [ take-rest ] bi
|
||||
] parse-sequence
|
||||
] unit-test
|
||||
|
||||
[ 6 ]
|
||||
[
|
||||
" foo " [ skip-whitespace n>> ] parse-sequence
|
||||
] unit-test
|
||||
|
||||
[ { 1 2 } ]
|
||||
[ { 1 2 3 } <sequence-parser> [ current 3 = ] take-until ] unit-test
|
||||
|
||||
[ { 1 2 } ]
|
||||
[ { 1 2 3 4 } <sequence-parser> { 3 4 } take-until-sequence ] unit-test
|
||||
|
||||
[ "ab" ]
|
||||
[ "abcd" <sequence-parser> "ab" take-sequence ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "abcd" <sequence-parser> "lol" take-sequence ] unit-test
|
||||
|
||||
[ "ab" ]
|
||||
[
|
||||
"abcd" <sequence-parser>
|
||||
[ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "abcd" <sequence-parser> "" take-sequence ] unit-test
|
||||
|
||||
[ "cd" ]
|
||||
[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "abc\\\"def" ]
|
||||
[
|
||||
"\"abc\\\"def\" asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ skip-whitespace "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "\"abc" ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ "\"abc" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "c" ]
|
||||
[ "c" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ "abcd e \\\"f g" ]
|
||||
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "" <sequence-parser> take-rest ] unit-test
|
||||
|
||||
[ "" ]
|
||||
[ "abc" <sequence-parser> dup "abc" take-sequence drop take-rest ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "abc" <sequence-parser> "abcdefg" take-sequence ] unit-test
|
|
@ -0,0 +1,126 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces math kernel sequences accessors fry circular
|
||||
unicode.case unicode.categories locals combinators.short-circuit
|
||||
make combinators io splitting ;
|
||||
IN: sequence-parser
|
||||
|
||||
TUPLE: sequence-parser sequence n ;
|
||||
|
||||
: <sequence-parser> ( sequence -- sequence-parser )
|
||||
sequence-parser new
|
||||
swap >>sequence
|
||||
0 >>n ;
|
||||
|
||||
: offset ( sequence-parser offset -- char/f )
|
||||
swap
|
||||
[ n>> + ] [ sequence>> ?nth ] bi ; inline
|
||||
|
||||
: current ( sequence-parser -- char/f ) 0 offset ; inline
|
||||
|
||||
: previous ( sequence-parser -- char/f ) -1 offset ; inline
|
||||
|
||||
: peek-next ( sequence-parser -- char/f ) 1 offset ; inline
|
||||
|
||||
: advance ( sequence-parser -- sequence-parser )
|
||||
[ 1 + ] change-n ; inline
|
||||
|
||||
: advance* ( sequence-parser -- )
|
||||
advance drop ; inline
|
||||
|
||||
: get+increment ( sequence-parser -- char/f )
|
||||
[ current ] [ advance drop ] bi ; inline
|
||||
|
||||
:: skip-until ( sequence-parser quot: ( obj -- ? ) -- )
|
||||
sequence-parser current [
|
||||
sequence-parser quot call [ sequence-parser advance quot skip-until ] unless
|
||||
] when ; inline recursive
|
||||
|
||||
: sequence-parse-end? ( sequence-parser -- ? ) current not ;
|
||||
|
||||
: take-until ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
|
||||
over sequence-parse-end? [
|
||||
2drop f
|
||||
] [
|
||||
[ drop n>> ]
|
||||
[ skip-until ]
|
||||
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
|
||||
] if ; inline
|
||||
|
||||
: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
|
||||
[ not ] compose take-until ; inline
|
||||
|
||||
: <safe-slice> ( from to seq -- slice/f )
|
||||
3dup {
|
||||
[ 2drop 0 < ]
|
||||
[ [ drop ] 2dip length > ]
|
||||
[ drop > ]
|
||||
} 3|| [ 3drop f ] [ slice boa ] if ; inline
|
||||
|
||||
:: take-sequence ( sequence-parser sequence -- obj/f )
|
||||
sequence-parser [ n>> dup sequence length + ] [ sequence>> ] bi
|
||||
<safe-slice> sequence sequence= [
|
||||
sequence
|
||||
sequence-parser [ sequence length + ] change-n drop
|
||||
] [
|
||||
f
|
||||
] if ;
|
||||
|
||||
:: take-until-sequence ( sequence-parser sequence -- sequence' )
|
||||
sequence length <growing-circular> :> growing
|
||||
sequence-parser
|
||||
[
|
||||
current growing push-growing-circular
|
||||
sequence growing sequence=
|
||||
] take-until :> found
|
||||
found dup length
|
||||
growing length 1- - head
|
||||
sequence-parser advance drop ;
|
||||
|
||||
: skip-whitespace ( sequence-parser -- sequence-parser )
|
||||
[ [ current blank? not ] take-until drop ] keep ;
|
||||
|
||||
: take-rest-slice ( sequence-parser -- sequence/f )
|
||||
[ sequence>> ] [ n>> ] bi
|
||||
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
|
||||
|
||||
: take-rest ( sequence-parser -- sequence )
|
||||
[ take-rest-slice ] [ sequence>> like ] bi ;
|
||||
|
||||
: take-until-object ( sequence-parser obj -- sequence )
|
||||
'[ current _ = ] take-until ;
|
||||
|
||||
: parse-sequence ( sequence quot -- )
|
||||
[ <sequence-parser> ] dip call ; inline
|
||||
|
||||
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
|
||||
sequence-parser n>> :> start-n
|
||||
sequence-parser advance
|
||||
[
|
||||
{
|
||||
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
|
||||
[ current quote-char = not ]
|
||||
} 1||
|
||||
] take-while :> string
|
||||
sequence-parser current quote-char = [
|
||||
sequence-parser advance* string
|
||||
] [
|
||||
start-n sequence-parser (>>n) f
|
||||
] if ;
|
||||
|
||||
: (take-token) ( sequence-parser -- string )
|
||||
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
||||
|
||||
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
|
||||
sequence-parser skip-whitespace
|
||||
dup current {
|
||||
{ quote-char [ escape-char quote-char take-quoted-string ] }
|
||||
{ f [ drop f ] }
|
||||
[ drop (take-token) ]
|
||||
} case ;
|
||||
|
||||
: take-token ( sequence-parser -- string/f )
|
||||
CHAR: \ CHAR: " take-token* ;
|
||||
|
||||
: write-full ( sequence-parser -- ) sequence>> write ;
|
||||
: write-rest ( sequence-parser -- ) take-rest write ;
|
Loading…
Reference in New Issue