rename html.parser.state to sequence-parser

db4
Doug Coleman 2009-04-09 15:28:48 -05:00
parent cdc3d1b643
commit 6583b4d38e
7 changed files with 234 additions and 235 deletions

View File

@ -1,6 +1,6 @@
! Copyright (C) 2009 Doug Coleman. ! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: html.parser.state io io.encodings.utf8 io.files USING: sequence-parser io io.encodings.utf8 io.files
io.streams.string kernel combinators accessors io.pathnames io.streams.string kernel combinators accessors io.pathnames
fry sequences arrays locals namespaces io.directories fry sequences arrays locals namespaces io.directories
assocs math splitting make unicode.categories assocs math splitting make unicode.categories

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008 Doug Coleman. ! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays hashtables html.parser.state USING: accessors arrays hashtables sequence-parser
html.parser.utils kernel namespaces sequences html.parser.utils kernel namespaces sequences
unicode.case unicode.categories combinators.short-circuit unicode.case unicode.categories combinators.short-circuit
quoting fry ; quoting fry ;

View File

@ -1,104 +0,0 @@
USING: tools.test html.parser.state ascii kernel accessors ;
IN: html.parser.state.tests
[ "hello" ]
[ "hello" [ take-rest ] state-parse ] unit-test
[ "hi" " how are you?" ]
[
"hi how are you?"
[ [ [ current blank? ] take-until ] [ take-rest ] bi ] state-parse
] unit-test
[ "foo" ";bar" ]
[
"foo;bar" [
[ CHAR: ; take-until-object ] [ take-rest ] bi
] state-parse
] unit-test
[ "foo " " bar" ]
[
"foo and bar" [
[ "and" take-until-sequence ] [ take-rest ] bi
] state-parse
] unit-test
[ 6 ]
[
" foo " [ skip-whitespace n>> ] state-parse
] unit-test
[ { 1 2 } ]
[ { 1 2 3 } <state-parser> [ current 3 = ] take-until ] unit-test
[ { 1 2 } ]
[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test
[ "ab" ]
[ "abcd" <state-parser> "ab" take-sequence ] unit-test
[ f ]
[ "abcd" <state-parser> "lol" take-sequence ] unit-test
[ "ab" ]
[
"abcd" <state-parser>
[ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
] unit-test
[ "" ]
[ "abcd" <state-parser> "" take-sequence ] unit-test
[ "cd" ]
[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
[ f ]
[
"\"abc\" asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
] unit-test
[ "abc\\\"def" ]
[
"\"abc\\\"def\" asdf" <state-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "asdf" ]
[
"\"abc\" asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ skip-whitespace "asdf" take-sequence ] bi
] unit-test
[ f ]
[
"\"abc asdf" <state-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "\"abc" ]
[
"\"abc asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ "\"abc" take-sequence ] bi
] unit-test
[ "c" ]
[ "c" <state-parser> take-token ] unit-test
[ f ]
[ "" <state-parser> take-token ] unit-test
[ "abcd e \\\"f g" ]
[ "\"abcd e \\\"f g\"" <state-parser> CHAR: \ CHAR: " take-token* ] unit-test
[ "" ]
[ "" <state-parser> take-rest ] unit-test
[ "" ]
[ "abc" <state-parser> dup "abc" take-sequence drop take-rest ] unit-test
[ f ]
[ "abc" <state-parser> "abcdefg" take-sequence ] unit-test

View File

@ -1,127 +0,0 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals combinators.short-circuit
make combinators io splitting ;
IN: html.parser.state
TUPLE: state-parser sequence n ;
: <state-parser> ( sequence -- state-parser )
state-parser new
swap >>sequence
0 >>n ;
: offset ( state-parser offset -- char/f )
swap
[ n>> + ] [ sequence>> ?nth ] bi ; inline
: current ( state-parser -- char/f ) 0 offset ; inline
: previous ( state-parser -- char/f ) -1 offset ; inline
: peek-next ( state-parser -- char/f ) 1 offset ; inline
: advance ( state-parser -- state-parser )
[ 1 + ] change-n ; inline
: advance* ( state-parser -- )
advance drop ; inline
: get+increment ( state-parser -- char/f )
[ current ] [ advance drop ] bi ; inline
:: skip-until ( state-parser quot: ( obj -- ? ) -- )
state-parser current [
state-parser quot call [ state-parser advance quot skip-until ] unless
] when ; inline recursive
: state-parse-end? ( state-parser -- ? ) current not ;
: take-until ( state-parser quot: ( obj -- ? ) -- sequence/f )
over state-parse-end? [
2drop f
] [
[ drop n>> ]
[ skip-until ]
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
] if ; inline
: take-while ( state-parser quot: ( obj -- ? ) -- sequence/f )
[ not ] compose take-until ; inline
: <safe-slice> ( from to seq -- slice/f )
3dup {
[ 2drop 0 < ]
[ [ drop ] 2dip length > ]
[ drop > ]
} 3|| [ 3drop f ] [ slice boa ] if ; inline
:: take-sequence ( state-parser sequence -- obj/f )
state-parser [ n>> dup sequence length + ] [ sequence>> ] bi
<safe-slice> sequence sequence= [
sequence
state-parser [ sequence length + ] change-n drop
] [
f
] if ;
:: take-until-sequence ( state-parser sequence -- sequence' )
sequence length <growing-circular> :> growing
state-parser
[
current growing push-growing-circular
sequence growing sequence=
] take-until :> found
found dup length
growing length 1- - head
state-parser advance drop ;
: skip-whitespace ( state-parser -- state-parser )
[ [ current blank? not ] take-until drop ] keep ;
: take-rest-slice ( state-parser -- sequence/f )
[ sequence>> ] [ n>> ] bi
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
: take-rest ( state-parser -- sequence )
[ take-rest-slice ] [ sequence>> like ] bi ;
: take-until-object ( state-parser obj -- sequence )
'[ current _ = ] take-until ;
: state-parse ( sequence quot -- )
[ <state-parser> ] dip call ; inline
:: take-quoted-string ( state-parser escape-char quote-char -- string )
state-parser n>> :> start-n
state-parser advance
[
{
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
[ current quote-char = not ]
} 1||
] take-while :> string
state-parser current quote-char = [
state-parser advance* string
] [
start-n state-parser (>>n) f
] if ;
: (take-token) ( state-parser -- string )
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
:: take-token* ( state-parser escape-char quote-char -- string/f )
state-parser skip-whitespace
dup current {
{ quote-char [ escape-char quote-char take-quoted-string ] }
{ f [ drop f ] }
[ drop (take-token) ]
} case ;
: take-token ( state-parser -- string/f )
CHAR: \ CHAR: " take-token* ;
: write-full ( state-parser -- ) sequence>> write ;
: write-rest ( state-parser -- ) take-rest write ;

View File

@ -2,8 +2,8 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint hashtables.private io kernel math namespaces prettyprint
quotations sequences splitting html.parser.state strings quotations sequences splitting strings quoting
combinators.short-circuit quoting ; combinators.short-circuit ;
IN: html.parser.utils IN: html.parser.utils
: trim1 ( seq ch -- newseq ) : trim1 ( seq ch -- newseq )

View File

@ -0,0 +1,104 @@
USING: tools.test sequence-parser ascii kernel accessors ;
IN: sequence-parser.tests
[ "hello" ]
[ "hello" [ take-rest ] parse-sequence ] unit-test
[ "hi" " how are you?" ]
[
"hi how are you?"
[ [ [ current blank? ] take-until ] [ take-rest ] bi ] parse-sequence
] unit-test
[ "foo" ";bar" ]
[
"foo;bar" [
[ CHAR: ; take-until-object ] [ take-rest ] bi
] parse-sequence
] unit-test
[ "foo " " bar" ]
[
"foo and bar" [
[ "and" take-until-sequence ] [ take-rest ] bi
] parse-sequence
] unit-test
[ 6 ]
[
" foo " [ skip-whitespace n>> ] parse-sequence
] unit-test
[ { 1 2 } ]
[ { 1 2 3 } <sequence-parser> [ current 3 = ] take-until ] unit-test
[ { 1 2 } ]
[ { 1 2 3 4 } <sequence-parser> { 3 4 } take-until-sequence ] unit-test
[ "ab" ]
[ "abcd" <sequence-parser> "ab" take-sequence ] unit-test
[ f ]
[ "abcd" <sequence-parser> "lol" take-sequence ] unit-test
[ "ab" ]
[
"abcd" <sequence-parser>
[ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
] unit-test
[ "" ]
[ "abcd" <sequence-parser> "" take-sequence ] unit-test
[ "cd" ]
[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
[ f ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
] unit-test
[ "abc\\\"def" ]
[
"\"abc\\\"def\" asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "asdf" ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ skip-whitespace "asdf" take-sequence ] bi
] unit-test
[ f ]
[
"\"abc asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "\"abc" ]
[
"\"abc asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ "\"abc" take-sequence ] bi
] unit-test
[ "c" ]
[ "c" <sequence-parser> take-token ] unit-test
[ f ]
[ "" <sequence-parser> take-token ] unit-test
[ "abcd e \\\"f g" ]
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
[ "" ]
[ "" <sequence-parser> take-rest ] unit-test
[ "" ]
[ "abc" <sequence-parser> dup "abc" take-sequence drop take-rest ] unit-test
[ f ]
[ "abc" <sequence-parser> "abcdefg" take-sequence ] unit-test

View File

@ -0,0 +1,126 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals combinators.short-circuit
make combinators io splitting ;
IN: sequence-parser
TUPLE: sequence-parser sequence n ;
: <sequence-parser> ( sequence -- sequence-parser )
sequence-parser new
swap >>sequence
0 >>n ;
: offset ( sequence-parser offset -- char/f )
swap
[ n>> + ] [ sequence>> ?nth ] bi ; inline
: current ( sequence-parser -- char/f ) 0 offset ; inline
: previous ( sequence-parser -- char/f ) -1 offset ; inline
: peek-next ( sequence-parser -- char/f ) 1 offset ; inline
: advance ( sequence-parser -- sequence-parser )
[ 1 + ] change-n ; inline
: advance* ( sequence-parser -- )
advance drop ; inline
: get+increment ( sequence-parser -- char/f )
[ current ] [ advance drop ] bi ; inline
:: skip-until ( sequence-parser quot: ( obj -- ? ) -- )
sequence-parser current [
sequence-parser quot call [ sequence-parser advance quot skip-until ] unless
] when ; inline recursive
: sequence-parse-end? ( sequence-parser -- ? ) current not ;
: take-until ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
over sequence-parse-end? [
2drop f
] [
[ drop n>> ]
[ skip-until ]
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
] if ; inline
: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
[ not ] compose take-until ; inline
: <safe-slice> ( from to seq -- slice/f )
3dup {
[ 2drop 0 < ]
[ [ drop ] 2dip length > ]
[ drop > ]
} 3|| [ 3drop f ] [ slice boa ] if ; inline
:: take-sequence ( sequence-parser sequence -- obj/f )
sequence-parser [ n>> dup sequence length + ] [ sequence>> ] bi
<safe-slice> sequence sequence= [
sequence
sequence-parser [ sequence length + ] change-n drop
] [
f
] if ;
:: take-until-sequence ( sequence-parser sequence -- sequence' )
sequence length <growing-circular> :> growing
sequence-parser
[
current growing push-growing-circular
sequence growing sequence=
] take-until :> found
found dup length
growing length 1- - head
sequence-parser advance drop ;
: skip-whitespace ( sequence-parser -- sequence-parser )
[ [ current blank? not ] take-until drop ] keep ;
: take-rest-slice ( sequence-parser -- sequence/f )
[ sequence>> ] [ n>> ] bi
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
: take-rest ( sequence-parser -- sequence )
[ take-rest-slice ] [ sequence>> like ] bi ;
: take-until-object ( sequence-parser obj -- sequence )
'[ current _ = ] take-until ;
: parse-sequence ( sequence quot -- )
[ <sequence-parser> ] dip call ; inline
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
sequence-parser n>> :> start-n
sequence-parser advance
[
{
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
[ current quote-char = not ]
} 1||
] take-while :> string
sequence-parser current quote-char = [
sequence-parser advance* string
] [
start-n sequence-parser (>>n) f
] if ;
: (take-token) ( sequence-parser -- string )
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
sequence-parser skip-whitespace
dup current {
{ quote-char [ escape-char quote-char take-quoted-string ] }
{ f [ drop f ] }
[ drop (take-token) ]
} case ;
: take-token ( sequence-parser -- string/f )
CHAR: \ CHAR: " take-token* ;
: write-full ( sequence-parser -- ) sequence>> write ;
: write-rest ( sequence-parser -- ) take-rest write ;