From 85fb4aab89d2d9940ba5e71f292f640629c7cb11 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 31 Mar 2009 04:43:26 -0500 Subject: [PATCH 1/7] syndication: fix help lint --- basis/syndication/syndication-docs.factor | 2 +- basis/syndication/syndication.factor | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/syndication/syndication-docs.factor b/basis/syndication/syndication-docs.factor index 47bdc3bb36..bc9612f55c 100644 --- a/basis/syndication/syndication-docs.factor +++ b/basis/syndication/syndication-docs.factor @@ -36,7 +36,7 @@ HELP: download-feed { $description "Downloads a feed from a URL using the " { $link "http.client" } "." } ; HELP: parse-feed -{ $values { "sequence" "a string or a byte array" } { "feed" feed } } +{ $values { "seq" "a string or a byte array" } { "feed" feed } } { $description "Parses a feed." } ; HELP: xml>feed diff --git a/basis/syndication/syndication.factor b/basis/syndication/syndication.factor index 3bfc95fe3a..e30cd6826c 100755 --- a/basis/syndication/syndication.factor +++ b/basis/syndication/syndication.factor @@ -106,7 +106,7 @@ TUPLE: entry title url description date ; { "feed" [ atom1.0 ] } } case ; -GENERIC: parse-feed ( string -- feed ) +GENERIC: parse-feed ( seq -- feed ) M: string parse-feed [ string>xml xml>feed ] with-html-entities ; From 1dfa621f4db3ccb5dafbe10c69f9e2e17869612b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 31 Mar 2009 08:03:27 -0500 Subject: [PATCH 2/7] Tweak some code to reduce deployed image size --- basis/cocoa/application/application.factor | 2 +- basis/core-foundation/strings/strings.factor | 4 ++-- basis/io/encodings/ascii/ascii.factor | 6 ++---- basis/io/encodings/iana/iana.factor | 5 ++++- basis/tools/deploy/shaker/shaker.factor | 1 + 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/basis/cocoa/application/application.factor b/basis/cocoa/application/application.factor index 9437051dad..8b33986fc2 100644 --- a/basis/cocoa/application/application.factor +++ b/basis/cocoa/application/application.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien alien.syntax io kernel namespaces core-foundation core-foundation.strings cocoa.messages cocoa cocoa.classes -cocoa.runtime sequences threads init summary kernel.private +cocoa.runtime sequences init summary kernel.private assocs ; IN: cocoa.application diff --git a/basis/core-foundation/strings/strings.factor b/basis/core-foundation/strings/strings.factor index 21f3d7efd4..413709d142 100644 --- a/basis/core-foundation/strings/strings.factor +++ b/basis/core-foundation/strings/strings.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien.syntax alien.strings io.encodings.string kernel sequences byte-arrays io.encodings.utf8 math core-foundation -core-foundation.arrays destructors unicode.data ; +core-foundation.arrays destructors ; IN: core-foundation.strings TYPEDEF: void* CFStringRef @@ -62,7 +62,7 @@ FUNCTION: CFStringRef CFStringCreateWithCString ( : prepare-CFString ( string -- byte-array ) [ dup HEX: 10ffff > - [ drop CHAR: replacement-character ] when + [ drop HEX: fffd ] when ] map utf8 encode ; : ( string -- alien ) diff --git a/basis/io/encodings/ascii/ascii.factor b/basis/io/encodings/ascii/ascii.factor index deb1a7121f..1654cb8b83 100644 --- a/basis/io/encodings/ascii/ascii.factor +++ b/basis/io/encodings/ascii/ascii.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: io io.encodings kernel math io.encodings.private io.encodings.iana ; +USING: io io.encodings kernel math io.encodings.private ; IN: io.encodings.ascii n-table [ initial-e>n ] initialize [ n>e-table get-global set-at ] with each ] [ "Bad encoding registration" throw ] if* ] [ swap e>n-table get-global set-at ] 2bi ; + +ascii "ANSI_X3.4-1968" register-encoding diff --git a/basis/tools/deploy/shaker/shaker.factor b/basis/tools/deploy/shaker/shaker.factor index 55433299ad..8ee0393091 100755 --- a/basis/tools/deploy/shaker/shaker.factor +++ b/basis/tools/deploy/shaker/shaker.factor @@ -157,6 +157,7 @@ IN: tools.deploy.shaker "specializer" "step-into" "step-into?" + "superclass" "transform-n" "transform-quot" "tuple-dispatch-generic" From 39b53817b948b69e8d61525a03fb20f78b970946 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 31 Mar 2009 08:15:27 -0500 Subject: [PATCH 3/7] Small size reduction for deployed images --- basis/compiler/codegen/codegen.factor | 2 +- vm/callstack.c | 2 +- vm/code_block.c | 11 +++++------ vm/code_block.h | 5 ++++- vm/debug.c | 11 +++++++---- vm/quotations.c | 3 ++- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 7df80c6b6e..65e70bd042 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -53,7 +53,7 @@ SYMBOL: labels V{ } clone literal-table set V{ } clone calls set compiling-word set - compiled-stack-traces? compiling-word get f ? add-literal ; + compiled-stack-traces? [ compiling-word get add-literal ] when ; : generate ( mr -- asm ) [ diff --git a/vm/callstack.c b/vm/callstack.c index d44a889756..b7e6b946bb 100755 --- a/vm/callstack.c +++ b/vm/callstack.c @@ -103,7 +103,7 @@ CELL frame_type(F_STACK_FRAME *frame) CELL frame_executing(F_STACK_FRAME *frame) { F_CODE_BLOCK *compiled = frame_code(frame); - if(compiled->literals == F) + if(compiled->literals == F || !stack_traces_p()) return F; else { diff --git a/vm/code_block.c b/vm/code_block.c index c6ecb2f431..8dda8bc16e 100644 --- a/vm/code_block.c +++ b/vm/code_block.c @@ -11,7 +11,7 @@ void iterate_relocations(F_CODE_BLOCK *compiled, RELOCATION_ITERATOR iter) { F_BYTE_ARRAY *relocation = untag_object(compiled->relocation); - CELL index = 1; + CELL index = stack_traces_p() ? 1 : 0; F_REL *rel = (F_REL *)(relocation + 1); F_REL *rel_end = (F_REL *)((char *)rel + byte_array_capacity(relocation)); @@ -368,11 +368,6 @@ void deposit_integers(CELL here, F_ARRAY *array, CELL format) } } -bool stack_traces_p(void) -{ - return to_boolean(userenv[STACK_TRACES_ENV]); -} - CELL compiled_code_format(void) { return untag_fixnum_fast(userenv[JIT_CODE_FORMAT]); @@ -429,6 +424,10 @@ F_CODE_BLOCK *add_code_block( UNREGISTER_ROOT(relocation); UNREGISTER_ROOT(literals); + /* slight space optimization */ + if(type_of(literals) == ARRAY_TYPE && array_capacity(untag_object(literals)) == 0) + literals = F; + /* compiled header */ compiled->block.type = type; compiled->block.last_scan = NURSERY; diff --git a/vm/code_block.h b/vm/code_block.h index b00e4be8b6..cb8ebf5e19 100644 --- a/vm/code_block.h +++ b/vm/code_block.h @@ -75,7 +75,10 @@ void relocate_code_block(F_CODE_BLOCK *relocating); CELL compiled_code_format(void); -bool stack_traces_p(void); +INLINE bool stack_traces_p(void) +{ + return userenv[STACK_TRACES_ENV] != F; +} F_CODE_BLOCK *add_code_block( CELL type, diff --git a/vm/debug.c b/vm/debug.c index adae1cdd36..6f7e883785 100755 --- a/vm/debug.c +++ b/vm/debug.c @@ -311,7 +311,7 @@ void find_data_references(CELL look_for_) /* Dump all code blocks for debugging */ void dump_code_heap(void) { - CELL size = 0; + CELL reloc_size = 0, literal_size = 0; F_BLOCK *scan = first_block(&code_heap); @@ -324,11 +324,13 @@ void dump_code_heap(void) status = "free"; break; case B_ALLOCATED: - size += object_size(((F_CODE_BLOCK *)scan)->relocation); + reloc_size += object_size(((F_CODE_BLOCK *)scan)->relocation); + literal_size += object_size(((F_CODE_BLOCK *)scan)->literals); status = "allocated"; break; case B_MARKED: - size += object_size(((F_CODE_BLOCK *)scan)->relocation); + reloc_size += object_size(((F_CODE_BLOCK *)scan)->relocation); + literal_size += object_size(((F_CODE_BLOCK *)scan)->literals); status = "marked"; break; default: @@ -343,7 +345,8 @@ void dump_code_heap(void) scan = next_block(&code_heap,scan); } - print_cell(size); print_string(" bytes of relocation data\n"); + print_cell(reloc_size); print_string(" bytes of relocation data\n"); + print_cell(literal_size); print_string(" bytes of literal data\n"); } void factorbug(void) diff --git a/vm/quotations.c b/vm/quotations.c index 86e47745b7..e18e6b6098 100755 --- a/vm/quotations.c +++ b/vm/quotations.c @@ -180,7 +180,8 @@ void jit_compile(CELL quot, bool relocate) GROWABLE_ARRAY(literals); REGISTER_ROOT(literals); - GROWABLE_ARRAY_ADD(literals,stack_traces_p() ? quot : F); + if(stack_traces_p()) + GROWABLE_ARRAY_ADD(literals,quot); bool stack_frame = jit_stack_frame_p(untag_object(array)); From 932d44cab8d62f2add4187ac4c3d88f98009f00e Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 31 Mar 2009 08:16:04 -0500 Subject: [PATCH 4/7] Small speedup for code using H{ } clone and with-scope --- .../compiler/tree/propagation/known-words/known-words.factor | 2 +- basis/hints/hints.factor | 2 +- core/hashtables/hashtables.factor | 4 ++-- core/namespaces/namespaces.factor | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/basis/compiler/tree/propagation/known-words/known-words.factor b/basis/compiler/tree/propagation/known-words/known-words.factor index ecfd415579..1b5d383353 100644 --- a/basis/compiler/tree/propagation/known-words/known-words.factor +++ b/basis/compiler/tree/propagation/known-words/known-words.factor @@ -312,7 +312,7 @@ generic-comparison-ops [ \ clone [ in-d>> first value-info literal>> { { V{ } [ [ drop { } 0 vector boa ] ] } - { H{ } [ [ drop hashtable new ] ] } + { H{ } [ [ drop 0 ] ] } [ drop f ] } case ] "custom-inlining" set-word-prop diff --git a/basis/hints/hints.factor b/basis/hints/hints.factor index 597367c353..804ef035f4 100644 --- a/basis/hints/hints.factor +++ b/basis/hints/hints.factor @@ -119,6 +119,6 @@ SYNTAX: HINTS: \ >be { { bignum fixnum } { fixnum fixnum } } "specializer" set-word-prop -\ hashtable \ at* method { { fixnum hashtable } { word hashtable } } "specializer" set-word-prop +\ hashtable \ at* method { { fixnum object } { word object } } "specializer" set-word-prop \ hashtable \ set-at method { { object fixnum object } { object word object } } "specializer" set-word-prop diff --git a/core/hashtables/hashtables.factor b/core/hashtables/hashtables.factor index 8aa13a5f5e..f95a7a7e67 100644 --- a/core/hashtables/hashtables.factor +++ b/core/hashtables/hashtables.factor @@ -79,7 +79,7 @@ TUPLE: hashtable : grow-hash ( hash -- ) [ [ >alist ] [ assoc-size 1+ ] bi ] keep [ reset-hash ] keep - swap (rehash) ; inline + swap (rehash) ; : ?grow-hash ( hash -- ) dup hash-large? [ @@ -95,7 +95,7 @@ TUPLE: hashtable PRIVATE> : ( n -- hash ) - hashtable new [ reset-hash ] keep ; + hashtable new [ reset-hash ] keep ; inline M: hashtable at* ( key hash -- value ? ) key@ [ 3 fixnum+fast slot t ] [ 2drop f f ] if ; diff --git a/core/namespaces/namespaces.factor b/core/namespaces/namespaces.factor index 623e2ddcda..b0e764c94d 100644 --- a/core/namespaces/namespaces.factor +++ b/core/namespaces/namespaces.factor @@ -30,6 +30,6 @@ PRIVATE> : bind ( ns quot -- ) swap >n call ndrop ; inline : counter ( variable -- n ) global [ 0 or 1+ dup ] change-at ; : make-assoc ( quot exemplar -- hash ) 20 swap new-assoc [ swap bind ] keep ; inline -: with-scope ( quot -- ) H{ } clone swap bind ; inline +: with-scope ( quot -- ) 5 swap bind ; inline : with-variable ( value key quot -- ) [ associate ] dip bind ; inline : initialize ( variable quot -- ) [ global ] dip [ unless* ] curry change-at ; inline \ No newline at end of file From 65cb08c3550962672a03d14ae76ae23da6d224c4 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 31 Mar 2009 11:12:05 -0500 Subject: [PATCH 5/7] fix help-lint for syndication --- basis/syndication/syndication.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/syndication/syndication.factor b/basis/syndication/syndication.factor index 3bfc95fe3a..75c1824c78 100755 --- a/basis/syndication/syndication.factor +++ b/basis/syndication/syndication.factor @@ -106,7 +106,7 @@ TUPLE: entry title url description date ; { "feed" [ atom1.0 ] } } case ; -GENERIC: parse-feed ( string -- feed ) +GENERIC: parse-feed ( sequence -- feed ) M: string parse-feed [ string>xml xml>feed ] with-html-entities ; From a07c17598efaaffee33c146464573d184042c53c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 31 Mar 2009 16:04:39 -0500 Subject: [PATCH 6/7] redo state parser to avoid dynamic variables --- extra/html/parser/state/state-tests.factor | 34 +++++++--- extra/html/parser/state/state.factor | 73 ++++++++++++++-------- 2 files changed, 72 insertions(+), 35 deletions(-) diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index da70d0fa12..f676649aa8 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -1,14 +1,30 @@ -USING: tools.test html.parser.state ascii kernel ; +USING: tools.test html.parser.state ascii kernel accessors ; IN: html.parser.state.tests -: take-rest ( -- string ) - [ f ] take-until ; +[ "hello" ] +[ "hello" [ take-rest ] string-parse ] unit-test -: take-char ( -- string ) - [ get-char = ] curry take-until ; +[ "hi" " how are you?" ] +[ + "hi how are you?" + [ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse +] unit-test + +[ "foo" ";bar" ] +[ + "foo;bar" [ + [ CHAR: ; take-until-char ] [ take-rest ] bi + ] string-parse +] unit-test -[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test -[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test -[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test [ "foo " " bar" ] -[ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test +[ + "foo and bar" [ + [ "and" take-until-string ] [ take-rest ] bi + ] string-parse +] unit-test + +[ 6 ] +[ + " foo " [ skip-whitespace i>> ] string-parse +] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index 1b3f188a78..c69fd76af5 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -1,41 +1,62 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: namespaces math kernel sequences accessors fry circular ; +USING: namespaces math kernel sequences accessors fry circular +unicode.case unicode.categories locals ; IN: html.parser.state -TUPLE: state string i ; +TUPLE: state-parser string i ; -: get-i ( -- i ) state get i>> ; inline +: ( string -- state-parser ) + state-parser new + swap >>string + 0 >>i ; -: get-char ( -- char ) - state get [ i>> ] [ string>> ] bi ?nth ; inline +: (get-char) ( i state -- char/f ) + string>> ?nth ; inline -: get-next ( -- char ) - state get [ i>> 1+ ] [ string>> ] bi ?nth ; inline +: get-char ( state -- char/f ) + [ i>> ] keep (get-char) ; inline -: next ( -- ) - state get [ 1+ ] change-i drop ; inline +: get-next ( state -- char/f ) + [ i>> 1+ ] keep (get-char) ; inline + +: next ( state -- state ) + [ 1+ ] change-i ; inline + +: get+increment ( state -- char/f ) + [ get-char ] [ next drop ] bi ; inline : string-parse ( string quot -- ) - [ 0 state boa state ] dip with-variable ; inline + [ ] dip call ; inline -: short* ( n seq -- n' seq ) - over [ nip dup length swap ] unless ; inline +:: skip-until ( state quot: ( obj -- ? ) -- ) + state get-char [ + quot call [ state next quot skip-until ] unless + ] when* ; inline recursive -: skip-until ( quot: ( -- ? ) -- ) - get-char [ - [ call ] keep swap - [ drop ] [ next skip-until ] if - ] [ drop ] if ; inline recursive +: take-until ( state quot: ( obj -- ? ) -- string ) + [ drop i>> ] + [ skip-until ] + [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline -: take-until ( quot: ( -- ? ) -- ) - get-i [ skip-until ] dip get-i - state get string>> subseq ; inline +:: take-until-string ( state-parser string -- string' ) + string length :> growing + state-parser + [ + growing push-growing-circular + string growing sequence= + ] take-until :> found + found dup length + growing length 1- - head + state-parser next drop ; + +: skip-whitespace ( state -- state ) + [ [ blank? not ] take-until drop ] keep ; -: string-matches? ( string circular -- ? ) - get-char over push-growing-circular sequence= ; inline +: take-rest ( state -- string ) + [ drop f ] take-until ; inline -: take-string ( match -- string ) - dup length - [ 2dup string-matches? ] take-until nip - dup length rot length 1- - head next ; inline +: take-until-char ( state ch -- string ) + '[ _ = ] take-until ; + +: string-parse-end? ( state -- ? ) get-next not ; From ebddd32677f92efda0715e9b1817288a4dc3d447 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 31 Mar 2009 16:05:11 -0500 Subject: [PATCH 7/7] remove duplication, refactor html.parser to use new state parser --- extra/html/parser/parser.factor | 145 ++++++++++----------- extra/html/parser/utils/utils-tests.factor | 9 +- extra/html/parser/utils/utils.factor | 16 +-- 3 files changed, 69 insertions(+), 101 deletions(-) diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index 60e5ddbf54..677737618b 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -1,10 +1,12 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays html.parser.utils hashtables io kernel -namespaces make prettyprint quotations sequences splitting -html.parser.state strings unicode.categories unicode.case ; +USING: accessors arrays hashtables html.parser.state +html.parser.utils kernel make namespaces sequences +unicode.case unicode.categories combinators.short-circuit +quoting ; IN: html.parser + TUPLE: tag name attributes text closing? ; SINGLETON: text @@ -28,113 +30,100 @@ SYMBOL: tagstack : make-tag ( string attribs -- tag ) [ [ closing-tag? ] keep "/" trim1 ] dip rot ; -: make-text-tag ( string -- tag ) +: new-tag ( string type -- tag ) tag new - text >>name - swap >>text ; + swap >>name + swap >>text ; inline -: make-comment-tag ( string -- tag ) - tag new - comment >>name - swap >>text ; +: make-text-tag ( string -- tag ) text new-tag ; inline -: make-dtd-tag ( string -- tag ) - tag new - dtd >>name - swap >>text ; +: make-comment-tag ( string -- tag ) comment new-tag ; inline -: read-whitespace ( -- string ) - [ get-char blank? not ] take-until ; +: make-dtd-tag ( string -- tag ) dtd new-tag ; inline -: read-whitespace* ( -- ) read-whitespace drop ; +: read-single-quote ( state-parser -- string ) + [ [ CHAR: ' = ] take-until ] [ next drop ] bi ; -: read-token ( -- string ) - read-whitespace* - [ get-char blank? ] take-until ; +: read-double-quote ( state-parser -- string ) + [ [ CHAR: " = ] take-until ] [ next drop ] bi ; -: read-single-quote ( -- string ) - [ get-char CHAR: ' = ] take-until ; +: read-quote ( state-parser -- string ) + dup get+increment CHAR: ' = + [ read-single-quote ] [ read-double-quote ] if ; -: read-double-quote ( -- string ) - [ get-char CHAR: " = ] take-until ; +: read-key ( state-parser -- string ) + skip-whitespace + [ { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ; -: read-quote ( -- string ) - get-char next CHAR: ' = - [ read-single-quote ] [ read-double-quote ] if next ; +: read-= ( state-parser -- ) + skip-whitespace + [ [ CHAR: = = ] take-until drop ] [ next drop ] bi ; -: read-key ( -- string ) - read-whitespace* - [ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ; +: read-token ( state-parser -- string ) + [ blank? ] take-until ; -: read-= ( -- ) - read-whitespace* - [ get-char CHAR: = = ] take-until drop next ; - -: read-value ( -- string ) - read-whitespace* - get-char quote? [ read-quote ] [ read-token ] if +: read-value ( state-parser -- string ) + skip-whitespace + dup get-char quote? [ read-quote ] [ read-token ] if [ blank? ] trim ; -: read-comment ( -- ) - "-->" take-string make-comment-tag push-tag ; +: read-comment ( state-parser -- ) + "-->" take-until-string make-comment-tag push-tag ; -: read-dtd ( -- ) - ">" take-string make-dtd-tag push-tag ; +: read-dtd ( state-parser -- ) + ">" take-until-string make-dtd-tag push-tag ; -: read-bang ( -- ) - next get-char CHAR: - = get-next CHAR: - = and [ +: read-bang ( state-parser -- ) + next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [ next next read-comment ] [ read-dtd ] if ; -: read-tag ( -- string ) - [ get-char CHAR: > = get-char CHAR: < = or ] take-until - get-char CHAR: < = [ next ] unless ; +: read-tag ( state-parser -- string ) + [ [ "><" member? ] take-until ] + [ dup get-char CHAR: < = [ next ] unless drop ] bi ; -: read-< ( -- string ) - next get-char CHAR: ! = [ - read-bang f +: read-until-< ( state-parser -- string ) + [ CHAR: < = ] take-until ; + +: parse-text ( state-parser -- ) + read-until-< [ make-text-tag push-tag ] unless-empty ; + +: (parse-attributes) ( state-parser -- ) + skip-whitespace + dup string-parse-end? [ + drop ] [ - read-tag + [ + [ read-key >lower ] [ read-= ] [ read-value ] tri + 2array , + ] keep (parse-attributes) ] if ; -: read-until-< ( -- string ) - [ get-char CHAR: < = ] take-until ; - -: parse-text ( -- ) - read-until-< [ - make-text-tag push-tag - ] unless-empty ; - -: (parse-attributes) ( -- ) - read-whitespace* - string-parse-end? [ - read-key >lower read-= read-value - 2array , (parse-attributes) - ] unless ; - -: parse-attributes ( -- hashtable ) +: parse-attributes ( state-parser -- hashtable ) [ (parse-attributes) ] { } make >hashtable ; : (parse-tag) ( string -- string' hashtable ) [ - read-token >lower - parse-attributes + [ read-token >lower ] [ parse-attributes ] bi ] string-parse ; -: parse-tag ( -- ) - read-< [ - (parse-tag) make-tag push-tag - ] unless-empty ; +: read-< ( state-parser -- string/f ) + next dup get-char [ + CHAR: ! = [ read-bang f ] [ read-tag ] if + ] [ + drop f + ] if* ; -: (parse-html) ( -- ) - get-next [ - parse-text - parse-tag - (parse-html) - ] when ; +: parse-tag ( state-parser -- ) + read-< [ (parse-tag) make-tag push-tag ] unless-empty ; + +: (parse-html) ( state-parser -- ) + dup get-next [ + [ parse-text ] [ parse-tag ] [ (parse-html) ] tri + ] [ drop ] if ; : tag-parse ( quot -- vector ) V{ } clone tagstack [ string-parse ] with-variable ; inline diff --git a/extra/html/parser/utils/utils-tests.factor b/extra/html/parser/utils/utils-tests.factor index 6d8e3bc05f..ec6780687d 100644 --- a/extra/html/parser/utils/utils-tests.factor +++ b/extra/html/parser/utils/utils-tests.factor @@ -1,20 +1,13 @@ USING: assocs combinators continuations hashtables hashtables.private io kernel math namespaces prettyprint quotations sequences splitting -strings tools.test ; -USING: html.parser.utils ; +strings tools.test html.parser.utils quoting ; IN: html.parser.utils.tests [ "'Rome'" ] [ "Rome" single-quote ] unit-test [ "\"Roma\"" ] [ "Roma" double-quote ] unit-test [ "'Firenze'" ] [ "Firenze" quote ] unit-test [ "\"Caesar's\"" ] [ "Caesar's" quote ] unit-test -[ f ] [ "" quoted? ] unit-test -[ t ] [ "''" quoted? ] unit-test -[ t ] [ "\"\"" quoted? ] unit-test -[ t ] [ "\"Circus Maximus\"" quoted? ] unit-test -[ t ] [ "'Circus Maximus'" quoted? ] unit-test -[ f ] [ "Circus Maximus" quoted? ] unit-test [ "'Italy'" ] [ "Italy" ?quote ] unit-test [ "'Italy'" ] [ "'Italy'" ?quote ] unit-test [ "\"Italy\"" ] [ "\"Italy\"" ?quote ] unit-test diff --git a/extra/html/parser/utils/utils.factor b/extra/html/parser/utils/utils.factor index c913b9d306..7abd2fcdf7 100644 --- a/extra/html/parser/utils/utils.factor +++ b/extra/html/parser/utils/utils.factor @@ -3,16 +3,12 @@ USING: assocs circular combinators continuations hashtables hashtables.private io kernel math namespaces prettyprint quotations sequences splitting html.parser.state strings -combinators.short-circuit ; +combinators.short-circuit quoting ; IN: html.parser.utils -: string-parse-end? ( -- ? ) get-next not ; - : trim1 ( seq ch -- newseq ) [ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ; -: quote? ( ch -- ? ) "'\"" member? ; - : single-quote ( str -- newstr ) "'" dup surround ; : double-quote ( str -- newstr ) "\"" dup surround ; @@ -21,14 +17,4 @@ IN: html.parser.utils CHAR: ' over member? [ double-quote ] [ single-quote ] if ; -: quoted? ( str -- ? ) - { - [ length 1 > ] - [ first quote? ] - [ [ first ] [ peek ] bi = ] - } 1&& ; - : ?quote ( str -- newstr ) dup quoted? [ quote ] unless ; - -: unquote ( str -- newstr ) - dup quoted? [ but-last-slice rest-slice >string ] when ;