Merge branch 'master' into smalltalk

db4
Slava Pestov 2009-04-01 00:00:20 -05:00
commit 86cf94260c
18 changed files with 240 additions and 181 deletions

View File

@ -10,12 +10,4 @@ IN: bootstrap.ui
{ [ os unix? ] [ "x11" ] } { [ os unix? ] [ "x11" ] }
} cond } cond
] unless* "ui.backend." prepend require ] unless* "ui.backend." prepend require
"ui-text-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require
] when ] when

View File

@ -3,68 +3,72 @@
USING: tools.test namespaces documents documents.elements multiline ; USING: tools.test namespaces documents documents.elements multiline ;
IN: document.elements.tests IN: document.elements.tests
<document> "doc" set SYMBOL: doc
"123\nabc" "doc" get set-doc-string <document> doc set
"123\nabcé" doc get set-doc-string
! char-elt ! char-elt
[ { 0 0 } ] [ { 0 0 } "doc" get char-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 0 } doc get char-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 1 } "doc" get char-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 1 } doc get char-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 0 } "doc" get char-elt prev-elt ] unit-test [ { 0 3 } ] [ { 1 0 } doc get char-elt prev-elt ] unit-test
[ { 1 3 } ] [ { 1 5 } doc get char-elt prev-elt ] unit-test
[ { 1 3 } ] [ { 1 3 } "doc" get char-elt next-elt ] unit-test [ { 1 5 } ] [ { 1 5 } doc get char-elt next-elt ] unit-test
[ { 0 2 } ] [ { 0 1 } "doc" get char-elt next-elt ] unit-test [ { 0 2 } ] [ { 0 1 } doc get char-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 3 } "doc" get char-elt next-elt ] unit-test [ { 1 0 } ] [ { 0 3 } doc get char-elt next-elt ] unit-test
[ { 1 5 } ] [ { 1 3 } doc get char-elt next-elt ] unit-test
! word-elt ! word-elt
<document> "doc" set <document> doc set
"Hello world\nanother line" "doc" get set-doc-string "Hello world\nanother line" doc get set-doc-string
[ { 0 0 } ] [ { 0 0 } "doc" get word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 0 } doc get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } "doc" get word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 2 } doc get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } "doc" get word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 5 } doc get word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 6 } "doc" get word-elt prev-elt ] unit-test [ { 0 5 } ] [ { 0 6 } doc get word-elt prev-elt ] unit-test
[ { 0 6 } ] [ { 0 8 } "doc" get word-elt prev-elt ] unit-test [ { 0 6 } ] [ { 0 8 } doc get word-elt prev-elt ] unit-test
[ { 0 11 } ] [ { 1 0 } "doc" get word-elt prev-elt ] unit-test [ { 0 11 } ] [ { 1 0 } doc get word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 0 } doc get word-elt next-elt ] unit-test
[ { 0 6 } ] [ { 0 5 } doc get word-elt next-elt ] unit-test
[ { 0 11 } ] [ { 0 6 } doc get word-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 11 } doc get word-elt next-elt ] unit-test
[ { 0 5 } ] [ { 0 0 } "doc" get word-elt next-elt ] unit-test
[ { 0 6 } ] [ { 0 5 } "doc" get word-elt next-elt ] unit-test
[ { 0 11 } ] [ { 0 6 } "doc" get word-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 11 } "doc" get word-elt next-elt ] unit-test
! one-word-elt ! one-word-elt
[ { 0 0 } ] [ { 0 0 } "doc" get one-word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 0 } doc get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } "doc" get one-word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 2 } doc get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } "doc" get one-word-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 5 } doc get one-word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 2 } "doc" get one-word-elt next-elt ] unit-test [ { 0 5 } ] [ { 0 2 } doc get one-word-elt next-elt ] unit-test
[ { 0 5 } ] [ { 0 5 } "doc" get one-word-elt next-elt ] unit-test [ { 0 5 } ] [ { 0 5 } doc get one-word-elt next-elt ] unit-test
! line-elt ! line-elt
<document> "doc" set <document> doc set
"Hello\nworld, how are\nyou?" "doc" get set-doc-string "Hello\nworld, how are\nyou?" doc get set-doc-string
[ { 0 0 } ] [ { 0 3 } "doc" get line-elt prev-elt ] unit-test [ { 0 0 } ] [ { 0 3 } doc get line-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 3 } "doc" get line-elt prev-elt ] unit-test [ { 0 3 } ] [ { 1 3 } doc get line-elt prev-elt ] unit-test
[ { 2 4 } ] [ { 2 1 } "doc" get line-elt next-elt ] unit-test [ { 2 4 } ] [ { 2 1 } doc get line-elt next-elt ] unit-test
! one-line-elt ! one-line-elt
[ { 1 0 } ] [ { 1 3 } "doc" get one-line-elt prev-elt ] unit-test [ { 1 0 } ] [ { 1 3 } doc get one-line-elt prev-elt ] unit-test
[ { 1 14 } ] [ { 1 3 } "doc" get one-line-elt next-elt ] unit-test [ { 1 14 } ] [ { 1 3 } doc get one-line-elt next-elt ] unit-test
! page-elt ! page-elt
<document> "doc" set <document> doc set
<" First line <" First line
Second line Second line
Third line Third line
Fourth line Fourth line
Fifth line Fifth line
Sixth line"> "doc" get set-doc-string Sixth line"> doc get set-doc-string
[ { 0 0 } ] [ { 3 3 } "doc" get 4 <page-elt> prev-elt ] unit-test [ { 0 0 } ] [ { 3 3 } doc get 4 <page-elt> prev-elt ] unit-test
[ { 1 2 } ] [ { 5 2 } "doc" get 4 <page-elt> prev-elt ] unit-test [ { 1 2 } ] [ { 5 2 } doc get 4 <page-elt> prev-elt ] unit-test
[ { 4 3 } ] [ { 0 3 } "doc" get 4 <page-elt> next-elt ] unit-test [ { 4 3 } ] [ { 0 3 } doc get 4 <page-elt> next-elt ] unit-test
[ { 5 10 } ] [ { 4 2 } "doc" get 4 <page-elt> next-elt ] unit-test [ { 5 10 } ] [ { 4 2 } doc get 4 <page-elt> next-elt ] unit-test
! doc-elt ! doc-elt
[ { 0 0 } ] [ { 3 4 } "doc" get doc-elt prev-elt ] unit-test [ { 0 0 } ] [ { 3 4 } doc get doc-elt prev-elt ] unit-test
[ { 5 10 } ] [ { 3 4 } "doc" get doc-elt next-elt ] unit-test [ { 5 10 } ] [ { 3 4 } doc get doc-elt next-elt ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2006, 2009 Slava Pestov. ! Copyright (C) 2006, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: arrays combinators documents fry kernel math sequences USING: arrays combinators documents fry kernel math sequences
unicode.categories accessors ; accessors unicode.categories unicode.breaks combinators.short-circuit ;
IN: documents.elements IN: documents.elements
GENERIC: prev-elt ( loc document elt -- newloc ) GENERIC: prev-elt ( loc document elt -- newloc )
@ -20,27 +20,32 @@ SINGLETON: char-elt
<PRIVATE <PRIVATE
: (prev-char) ( loc document quot -- loc ) : prev ( loc document quot: ( loc document -- loc ) -- loc )
{ {
{ [ pick { 0 0 } = ] [ 2drop ] } { [ pick { 0 0 } = ] [ 2drop ] }
{ [ pick second zero? ] [ drop [ first 1- ] dip line-end ] } { [ pick second zero? ] [ drop [ first 1- ] dip line-end ] }
[ call ] [ call ]
} cond ; inline } cond ; inline
: (next-char) ( loc document quot -- loc ) : next ( loc document quot: ( loc document -- loc ) -- loc )
{ {
{ [ 2over doc-end = ] [ 2drop ] } { [ 2over doc-end = ] [ 2drop ] }
{ [ 2over line-end? ] [ 2drop first 1+ 0 2array ] } { [ 2over line-end? ] [ 2drop first 1+ 0 2array ] }
[ call ] [ call ]
} cond ; inline } cond ; inline
: modify-col ( loc document quot: ( col str -- col' ) -- loc )
pick [
[ [ first2 swap ] dip doc-line ] dip call
] dip =col ; inline
PRIVATE> PRIVATE>
M: char-elt prev-elt M: char-elt prev-elt
drop [ drop -1 +col ] (prev-char) ; drop [ [ last-grapheme-from ] modify-col ] prev ;
M: char-elt next-elt M: char-elt next-elt
drop [ drop 1 +col ] (next-char) ; drop [ [ first-grapheme-from ] modify-col ] next ;
SINGLETON: one-char-elt SINGLETON: one-char-elt
@ -50,21 +55,16 @@ M: one-char-elt next-elt 2drop ;
<PRIVATE <PRIVATE
: (word-elt) ( loc document quot -- loc ) : blank-at? ( n seq -- n seq ? )
pick [
[ [ first2 swap ] dip doc-line ] dip call
] dip =col ; inline
: ((word-elt)) ( n seq -- n seq ? )
2dup ?nth blank? ; 2dup ?nth blank? ;
: break-detector ( ? -- quot ) : break-detector ( ? -- quot )
'[ blank? _ xor ] ; inline '[ blank? _ xor ] ; inline
: (prev-word) ( col str ? -- col ) : prev-word ( col str ? -- col )
break-detector find-last-from drop ?1+ ; break-detector find-last-from drop ?1+ ;
: (next-word) ( col str ? -- col ) : next-word ( col str ? -- col )
[ break-detector find-from drop ] [ drop length ] 2bi or ; [ break-detector find-from drop ] [ drop length ] 2bi or ;
PRIVATE> PRIVATE>
@ -73,23 +73,23 @@ SINGLETON: one-word-elt
M: one-word-elt prev-elt M: one-word-elt prev-elt
drop drop
[ [ 1- ] dip f (prev-word) ] (word-elt) ; [ [ 1- ] dip f prev-word ] modify-col ;
M: one-word-elt next-elt M: one-word-elt next-elt
drop drop
[ f (next-word) ] (word-elt) ; [ f next-word ] modify-col ;
SINGLETON: word-elt SINGLETON: word-elt
M: word-elt prev-elt M: word-elt prev-elt
drop drop
[ [ [ 1- ] dip ((word-elt)) (prev-word) ] (word-elt) ] [ [ [ 1- ] dip blank-at? prev-word ] modify-col ]
(prev-char) ; prev ;
M: word-elt next-elt M: word-elt next-elt
drop drop
[ [ ((word-elt)) (next-word) ] (word-elt) ] [ [ blank-at? next-word ] modify-col ]
(next-char) ; next ;
SINGLETON: one-line-elt SINGLETON: one-line-elt
@ -118,4 +118,4 @@ SINGLETON: doc-elt
M: doc-elt prev-elt 3drop { 0 0 } ; M: doc-elt prev-elt 3drop { 0 0 } ;
M: doc-elt next-elt drop nip doc-end ; M: doc-elt next-elt drop nip doc-end ;

View File

@ -155,18 +155,21 @@ TUPLE: peg-head rule-id involved-set eval-set ;
dup pos>> pos set ans>> dup pos>> pos set ans>>
; inline ; inline
:: (setup-lr) ( r l s -- ) :: (setup-lr) ( l s -- )
s head>> l head>> eq? [ s [
l head>> s (>>head) s left-recursion? [ s throw ] unless
l head>> [ s rule-id>> suffix ] change-involved-set drop s head>> l head>> eq? [
r l s next>> (setup-lr) l head>> s (>>head)
] unless ; l head>> [ s rule-id>> suffix ] change-involved-set drop
l s next>> (setup-lr)
] unless
] when ;
:: setup-lr ( r l -- ) :: setup-lr ( r l -- )
l head>> [ l head>> [
r rule-id V{ } clone V{ } clone peg-head boa l (>>head) r rule-id V{ } clone V{ } clone peg-head boa l (>>head)
] unless ] unless
r l lrstack get (setup-lr) ; l lrstack get (setup-lr) ;
:: lr-answer ( r p m -- ast ) :: lr-answer ( r p m -- ast )
[let* | [let* |
@ -216,8 +219,10 @@ TUPLE: peg-head rule-id involved-set eval-set ;
lrstack get next>> lrstack set lrstack get next>> lrstack set
pos get m (>>pos) pos get m (>>pos)
lr head>> [ lr head>> [
ans lr (>>seed) m ans>> left-recursion? [
r p m lr-answer ans lr (>>seed)
r p m lr-answer
] [ ans ] if
] [ ] [
ans m (>>ans) ans m (>>ans)
ans ans

View File

@ -3,8 +3,7 @@
USING: accessors arrays assocs continuations kernel math models USING: accessors arrays assocs continuations kernel math models
namespaces opengl sequences io combinators combinators.short-circuit namespaces opengl sequences io combinators combinators.short-circuit
fry math.vectors math.rectangles cache ui.gadgets ui.gestures fry math.vectors math.rectangles cache ui.gadgets ui.gestures
ui.render ui.text ui.text.private ui.backend ui.gadgets.tracks ui.render ui.backend ui.gadgets.tracks ui.commands ;
ui.commands ;
IN: ui.gadgets.worlds IN: ui.gadgets.worlds
TUPLE: world < track TUPLE: world < track
@ -53,7 +52,6 @@ M: world request-focus-on ( child gadget -- )
swap >>status swap >>status
swap >>title swap >>title
swap 1 track-add swap 1 track-add
dup init-text-rendering
dup request-focus ; dup request-focus ;
: <world> ( gadget title status -- world ) : <world> ( gadget title status -- world )
@ -74,15 +72,20 @@ M: world remove-gadget
2dup layers>> memq? 2dup layers>> memq?
[ layers>> delq ] [ call-next-method ] if ; [ layers>> delq ] [ call-next-method ] if ;
SYMBOL: flush-layout-cache-hook
flush-layout-cache-hook [ [ ] ] initialize
: (draw-world) ( world -- ) : (draw-world) ( world -- )
dup handle>> [ dup handle>> [
{ {
[ init-gl ] [ init-gl ]
[ draw-gadget ] [ draw-gadget ]
[ finish-text-rendering ] [ text-handle>> [ purge-cache ] when* ]
[ images>> [ purge-cache ] when* ] [ images>> [ purge-cache ] when* ]
} cleave } cleave
] with-gl-context ; ] with-gl-context
flush-layout-cache-hook get call( -- ) ;
: draw-world? ( world -- ? ) : draw-world? ( world -- ? )
#! We don't draw deactivated worlds, or those with 0 size. #! We don't draw deactivated worlds, or those with 0 size.

View File

@ -18,12 +18,11 @@ M: core-text-renderer string-dim
[ cached-line dim>> ] [ cached-line dim>> ]
if-empty ; if-empty ;
M: core-text-renderer finish-text-rendering M: core-text-renderer flush-layout-cache
text-handle>> purge-cache
cached-lines get purge-cache ; cached-lines get purge-cache ;
: rendered-line ( font string -- texture ) : rendered-line ( font string -- texture )
world get text-handle>> world get world-text-handle
[ cached-line [ image>> ] [ loc>> ] bi <texture> ] [ cached-line [ image>> ] [ loc>> ] bi <texture> ]
2cache ; 2cache ;

View File

@ -14,12 +14,11 @@ M: pango-renderer string-dim
[ " " string-dim { 0 1 } v* ] [ " " string-dim { 0 1 } v* ]
[ cached-layout logical-rect>> dim>> [ >integer ] map ] if-empty ; [ cached-layout logical-rect>> dim>> [ >integer ] map ] if-empty ;
M: pango-renderer finish-text-rendering M: pango-renderer flush-layout-cache
text-handle>> purge-cache
cached-layouts get purge-cache ; cached-layouts get purge-cache ;
: rendered-layout ( font string -- texture ) : rendered-layout ( font string -- texture )
world get text-handle>> world get world-text-handle
[ cached-layout [ image>> ] [ text-position vneg ] bi <texture> ] [ cached-layout [ image>> ] [ text-position vneg ] bi <texture> ]
2cache ; 2cache ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov. ! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays sequences math math.order opengl opengl.gl USING: kernel arrays sequences math math.order opengl opengl.gl
strings fonts colors accessors ; strings fonts colors accessors namespaces ui.gadgets.worlds ;
IN: ui.text IN: ui.text
<PRIVATE <PRIVATE
@ -10,9 +10,13 @@ SYMBOL: font-renderer
HOOK: init-text-rendering font-renderer ( world -- ) HOOK: init-text-rendering font-renderer ( world -- )
HOOK: finish-text-rendering font-renderer ( world -- ) : world-text-handle ( world -- handle )
dup text-handle>> [ dup init-text-rendering ] unless
text-handle>> ;
M: object finish-text-rendering drop ; HOOK: flush-layout-cache font-renderer ( -- )
[ flush-layout-cache ] flush-layout-cache-hook set-global
HOOK: string-dim font-renderer ( font string -- dim ) HOOK: string-dim font-renderer ( font string -- dim )
@ -68,4 +72,14 @@ M: array draw-text
[ draw-string ] [ draw-string ]
[ [ 0.0 ] 2dip string-height 0.0 glTranslated ] 2bi [ [ 0.0 ] 2dip string-height 0.0 glTranslated ] 2bi
] with each ] with each
] do-matrix ; ] do-matrix ;
USING: vocabs.loader namespaces system combinators ;
"ui-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require

View File

@ -4,8 +4,7 @@ USING: arrays assocs io kernel math models namespaces make dlists
deques sequences threads sequences words continuations init deques sequences threads sequences words continuations init
combinators hashtables concurrency.flags sets accessors calendar fry combinators hashtables concurrency.flags sets accessors calendar fry
destructors ui.gadgets ui.gadgets.private ui.gadgets.worlds destructors ui.gadgets ui.gadgets.private ui.gadgets.worlds
ui.gadgets.tracks ui.gestures ui.backend ui.render ui.text ui.gadgets.tracks ui.gestures ui.backend ui.render ;
ui.text.private ;
IN: ui IN: ui
<PRIVATE <PRIVATE
@ -63,7 +62,7 @@ M: world graft*
: (ungraft-world) ( world -- ) : (ungraft-world) ( world -- )
{ {
[ handle>> select-gl-context ] [ handle>> select-gl-context ]
[ text-handle>> dispose ] [ text-handle>> [ dispose ] when* ]
[ images>> [ dispose ] when* ] [ images>> [ dispose ] when* ]
[ hand-clicked close-global ] [ hand-clicked close-global ]
[ hand-gadget close-global ] [ hand-gadget close-global ]
@ -95,8 +94,7 @@ M: world ungraft*
: restore-world ( world -- ) : restore-world ( world -- )
{ {
[ reset-world ] [ reset-world ]
[ init-text-rendering ] [ f >>text-handle f >>images drop ]
[ f >>images drop ]
[ restore-gadget ] [ restore-gadget ]
} cleave ; } cleave ;

View File

@ -9,6 +9,9 @@ IN: unicode.breaks.tests
[ 3 ] [ "\u001112\u001161\u0011abA\u000300a" [ 3 ] [ "\u001112\u001161\u0011abA\u000300a"
dup last-grapheme head last-grapheme ] unit-test dup last-grapheme head last-grapheme ] unit-test
[ 3 ] [ 2 "hello" first-grapheme-from ] unit-test
[ 1 ] [ 2 "hello" last-grapheme-from ] unit-test
: grapheme-break-test ( -- filename ) : grapheme-break-test ( -- filename )
"vocab:unicode/breaks/GraphemeBreakTest.txt" ; "vocab:unicode/breaks/GraphemeBreakTest.txt" ;

View File

@ -101,6 +101,16 @@ PRIVATE>
[ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
nip swap length or 1+ ; nip swap length or 1+ ;
: first-grapheme-from ( start str -- i )
over tail-slice first-grapheme + ;
: last-grapheme ( str -- i )
unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
: last-grapheme-from ( end str -- i )
swap head-slice last-grapheme ;
<PRIVATE <PRIVATE
: >pieces ( str quot: ( str -- i ) -- graphemes ) : >pieces ( str quot: ( str -- i ) -- graphemes )
@ -114,10 +124,6 @@ PRIVATE>
: string-reverse ( str -- rts ) : string-reverse ( str -- rts )
>graphemes reverse concat ; >graphemes reverse concat ;
: last-grapheme ( str -- i )
unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
<PRIVATE <PRIVATE
graphemes init-table table graphemes init-table table

View File

@ -15,6 +15,7 @@ ERROR: bad-effect ;
scan { scan {
{ "(" [ ")" parse-effect ] } { "(" [ ")" parse-effect ] }
{ f [ ")" unexpected-eof ] } { f [ ")" unexpected-eof ] }
[ bad-effect ]
} case 2array } case 2array
] when ] when
] if ] if
@ -31,4 +32,4 @@ ERROR: bad-effect ;
"(" expect ")" parse-effect ; "(" expect ")" parse-effect ;
: parse-call( ( accum word -- accum ) : parse-call( ( accum word -- accum )
[ ")" parse-effect ] dip 2array over push-all ; [ ")" parse-effect ] dip 2array over push-all ;

View File

@ -97,7 +97,7 @@ ERROR: bad-slot-value value class ;
"writing" associate ; "writing" associate ;
: define-writer-generic ( name -- ) : define-writer-generic ( name -- )
writer-word (( object value -- )) define-simple-generic ; writer-word (( value object -- )) define-simple-generic ;
: define-writer ( class slot-spec -- ) : define-writer ( class slot-spec -- )
[ nip name>> define-writer-generic ] [ [ nip name>> define-writer-generic ] [

View File

@ -68,10 +68,10 @@ SYMBOL: tagstack
[ blank? ] trim ; [ blank? ] trim ;
: read-comment ( state-parser -- ) : read-comment ( state-parser -- )
"-->" take-until-string make-comment-tag push-tag ; "-->" take-until-sequence make-comment-tag push-tag ;
: read-dtd ( state-parser -- ) : read-dtd ( state-parser -- )
">" take-until-string make-dtd-tag push-tag ; ">" take-until-sequence make-dtd-tag push-tag ;
: read-bang ( state-parser -- ) : read-bang ( state-parser -- )
next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [ next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
@ -93,7 +93,7 @@ SYMBOL: tagstack
: (parse-attributes) ( state-parser -- ) : (parse-attributes) ( state-parser -- )
skip-whitespace skip-whitespace
dup string-parse-end? [ dup state-parse-end? [
drop drop
] [ ] [
[ [
@ -108,7 +108,7 @@ SYMBOL: tagstack
: (parse-tag) ( string -- string' hashtable ) : (parse-tag) ( string -- string' hashtable )
[ [
[ read-token >lower ] [ parse-attributes ] bi [ read-token >lower ] [ parse-attributes ] bi
] string-parse ; ] state-parse ;
: read-< ( state-parser -- string/f ) : read-< ( state-parser -- string/f )
next dup get-char [ next dup get-char [
@ -126,7 +126,7 @@ SYMBOL: tagstack
] [ drop ] if ; ] [ drop ] if ;
: tag-parse ( quot -- vector ) : tag-parse ( quot -- vector )
V{ } clone tagstack [ string-parse ] with-variable ; inline V{ } clone tagstack [ state-parse ] with-variable ; inline
: parse-html ( string -- vector ) : parse-html ( string -- vector )
[ (parse-html) tagstack get ] tag-parse ; [ (parse-html) tagstack get ] tag-parse ;

View File

@ -2,29 +2,35 @@ USING: tools.test html.parser.state ascii kernel accessors ;
IN: html.parser.state.tests IN: html.parser.state.tests
[ "hello" ] [ "hello" ]
[ "hello" [ take-rest ] string-parse ] unit-test [ "hello" [ take-rest ] state-parse ] unit-test
[ "hi" " how are you?" ] [ "hi" " how are you?" ]
[ [
"hi how are you?" "hi how are you?"
[ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse [ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse
] unit-test ] unit-test
[ "foo" ";bar" ] [ "foo" ";bar" ]
[ [
"foo;bar" [ "foo;bar" [
[ CHAR: ; take-until-char ] [ take-rest ] bi [ CHAR: ; take-until-object ] [ take-rest ] bi
] string-parse ] state-parse
] unit-test ] unit-test
[ "foo " " bar" ] [ "foo " " bar" ]
[ [
"foo and bar" [ "foo and bar" [
[ "and" take-until-string ] [ take-rest ] bi [ "and" take-until-sequence ] [ take-rest ] bi
] string-parse ] state-parse
] unit-test ] unit-test
[ 6 ] [ 6 ]
[ [
" foo " [ skip-whitespace i>> ] string-parse " foo " [ skip-whitespace n>> ] state-parse
] unit-test ] unit-test
[ { 1 2 } ]
[ { 1 2 3 } <state-parser> [ 3 = ] take-until ] unit-test
[ { 1 2 } ]
[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test

View File

@ -2,31 +2,32 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals ; unicode.case unicode.categories locals ;
IN: html.parser.state IN: html.parser.state
TUPLE: state-parser string i ; TUPLE: state-parser sequence n ;
: <state-parser> ( string -- state-parser ) : <state-parser> ( sequence -- state-parser )
state-parser new state-parser new
swap >>string swap >>sequence
0 >>i ; 0 >>n ;
: (get-char) ( i state -- char/f ) : (get-char) ( n state -- char/f )
string>> ?nth ; inline sequence>> ?nth ; inline
: get-char ( state -- char/f ) : get-char ( state -- char/f )
[ i>> ] keep (get-char) ; inline [ n>> ] keep (get-char) ; inline
: get-next ( state -- char/f ) : get-next ( state -- char/f )
[ i>> 1+ ] keep (get-char) ; inline [ n>> 1 + ] keep (get-char) ; inline
: next ( state -- state ) : next ( state -- state )
[ 1+ ] change-i ; inline [ 1 + ] change-n ; inline
: get+increment ( state -- char/f ) : get+increment ( state -- char/f )
[ get-char ] [ next drop ] bi ; inline [ get-char ] [ next drop ] bi ; inline
: string-parse ( string quot -- ) : state-parse ( sequence quot -- )
[ <state-parser> ] dip call ; inline [ <state-parser> ] dip call ; inline
:: skip-until ( state quot: ( obj -- ? ) -- ) :: skip-until ( state quot: ( obj -- ? ) -- )
@ -34,17 +35,23 @@ TUPLE: state-parser string i ;
quot call [ state next quot skip-until ] unless quot call [ state next quot skip-until ] unless
] when* ; inline recursive ] when* ; inline recursive
: take-until ( state quot: ( obj -- ? ) -- string ) : state-parse-end? ( state -- ? ) get-next not ;
[ drop i>> ]
[ skip-until ]
[ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline
:: take-until-string ( state-parser string -- string' ) : take-until ( state quot: ( obj -- ? ) -- sequence/f )
string length <growing-circular> :> growing over state-parse-end? [
2drop f
] [
[ drop n>> ]
[ skip-until ]
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
] if ; inline
:: take-until-sequence ( state-parser sequence -- sequence' )
sequence length <growing-circular> :> growing
state-parser state-parser
[ [
growing push-growing-circular growing push-growing-circular
string growing sequence= sequence growing sequence=
] take-until :> found ] take-until :> found
found dup length found dup length
growing length 1- - head growing length 1- - head
@ -53,10 +60,8 @@ TUPLE: state-parser string i ;
: skip-whitespace ( state -- state ) : skip-whitespace ( state -- state )
[ [ blank? not ] take-until drop ] keep ; [ [ blank? not ] take-until drop ] keep ;
: take-rest ( state -- string ) : take-rest ( state -- sequence )
[ drop f ] take-until ; inline [ drop f ] take-until ; inline
: take-until-char ( state ch -- string ) : take-until-object ( state obj -- sequence )
'[ _ = ] take-until ; '[ _ = ] take-until ;
: string-parse-end? ( state -- ? ) get-next not ;

View File

@ -16,11 +16,6 @@ HELP: run-spider
{ "spider" spider } } { "spider" spider } }
{ $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ; { $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ;
HELP: slurp-heap-while
{ $values
{ "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } }
{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ;
ARTICLE: "spider-tutorial" "Spider tutorial" ARTICLE: "spider-tutorial" "Spider tutorial"
"To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider." "To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider."
{ $code <" "http://concatenative.org" <spider> "> } { $code <" "http://concatenative.org" <spider> "> }

View File

@ -3,8 +3,8 @@
USING: accessors fry html.parser html.parser.analyzer USING: accessors fry html.parser html.parser.analyzer
http.client kernel tools.time sets assocs sequences http.client kernel tools.time sets assocs sequences
concurrency.combinators io threads namespaces math multiline concurrency.combinators io threads namespaces math multiline
heaps math.parser inspector urls assoc-heaps logging math.parser inspector urls logging combinators.short-circuit
combinators.short-circuit continuations calendar prettyprint ; continuations calendar prettyprint dlists deques locals ;
IN: spider IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links TUPLE: spider base count max-count sleep max-depth initial-links
@ -13,12 +13,33 @@ filters spidered todo nonmatching quiet ;
TUPLE: spider-result url depth headers fetch-time parsed-html TUPLE: spider-result url depth headers fetch-time parsed-html
links processing-time timestamp ; links processing-time timestamp ;
TUPLE: todo-url url depth ;
: <todo-url> ( url depth -- todo-url )
todo-url new
swap >>depth
swap >>url ;
TUPLE: unique-deque assoc deque ;
: <unique-deque> ( -- unique-deque )
H{ } clone <dlist> unique-deque boa ;
: push-url ( url depth unique-deque -- )
[ <todo-url> ] dip
[ [ [ t ] dip url>> ] [ assoc>> ] bi* set-at ]
[ deque>> push-back ] 2bi ;
: pop-url ( unique-deque -- todo-url ) deque>> pop-front ;
: peek-url ( unique-deque -- todo-url ) deque>> peek-front ;
: <spider> ( base -- spider ) : <spider> ( base -- spider )
>url >url
spider new spider new
over >>base over >>base
swap 0 <unique-min-heap> [ heap-push ] keep >>todo swap 0 <unique-deque> [ push-url ] keep >>todo
<unique-min-heap> >>nonmatching <unique-deque> >>nonmatching
0 >>max-depth 0 >>max-depth
0 >>count 0 >>count
1/0. >>max-count 1/0. >>max-count
@ -27,10 +48,10 @@ links processing-time timestamp ;
<PRIVATE <PRIVATE
: apply-filters ( links spider -- links' ) : apply-filters ( links spider -- links' )
filters>> [ '[ _ 1&& ] filter ] when* ; filters>> [ '[ [ _ 1&& ] filter ] call( seq -- seq' ) ] when* ;
: push-links ( links level assoc-heap -- ) : push-links ( links level unique-deque -- )
'[ _ _ heap-push ] each ; '[ _ _ push-url ] each ;
: add-todo ( links level spider -- ) : add-todo ( links level spider -- )
todo>> push-links ; todo>> push-links ;
@ -38,64 +59,72 @@ links processing-time timestamp ;
: add-nonmatching ( links level spider -- ) : add-nonmatching ( links level spider -- )
nonmatching>> push-links ; nonmatching>> push-links ;
: filter-base ( spider spider-result -- base-links nonmatching-links ) : filter-base-links ( spider spider-result -- base-links nonmatching-links )
[ base>> host>> ] [ links>> prune ] bi* [ base>> host>> ] [ links>> prune ] bi*
[ host>> = ] with partition ; [ host>> = ] with partition ;
: add-spidered ( spider spider-result -- ) : add-spidered ( spider spider-result -- )
[ [ 1+ ] change-count ] dip [ [ 1+ ] change-count ] dip
2dup [ spidered>> ] [ dup url>> ] bi* rot set-at 2dup [ spidered>> ] [ dup url>> ] bi* rot set-at
[ filter-base ] 2keep [ filter-base-links ] 2keep
depth>> 1+ swap depth>> 1+ swap
[ add-nonmatching ] [ add-nonmatching ]
[ tuck [ apply-filters ] 2dip add-todo ] 2bi ; [ tuck [ apply-filters ] 2dip add-todo ] 2bi ;
: normalize-hrefs ( links -- links' ) : normalize-hrefs ( links spider -- links' )
[ >url ] map [ [ >url ] map ] dip
spider get base>> swap [ derive-url ] with map ; base>> swap [ derive-url ] with map ;
: print-spidering ( url depth -- ) : print-spidering ( url depth -- )
"depth: " write number>string write "depth: " write number>string write
", spidering: " write . yield ; ", spidering: " write . yield ;
: (spider-page) ( url depth -- spider-result ) :: new-spidered-result ( spider url depth -- spider-result )
f pick spider get spidered>> set-at f url spider spidered>> set-at
over '[ _ http-get ] benchmark swap [ url http-get ] benchmark :> fetch-time :> html :> headers
[ parse-html dup find-hrefs normalize-hrefs ] benchmark [
html parse-html [ ] [ find-hrefs spider normalize-hrefs ] bi
] benchmark :> processing-time :> links :> parsed-html
url depth headers fetch-time parsed-html links processing-time
now spider-result boa ; now spider-result boa ;
: spider-page ( url depth -- ) :: spider-page ( spider url depth -- )
spider get quiet>> [ 2dup print-spidering ] unless spider quiet>> [ url depth print-spidering ] unless
(spider-page) spider url depth new-spidered-result :> spidered-result
spider get [ quiet>> [ dup describe ] unless ] spider quiet>> [ spidered-result describe ] unless
[ swap add-spidered ] bi ; spider spidered-result add-spidered ;
\ spider-page ERROR add-error-logging \ spider-page ERROR add-error-logging
: spider-sleep ( -- ) : spider-sleep ( spider -- )
spider get sleep>> [ sleep ] when* ; sleep>> [ sleep ] when* ;
: queue-initial-links ( spider -- spider ) :: queue-initial-links ( spider -- spider )
[ initial-links>> normalize-hrefs 0 ] keep spider initial-links>> spider normalize-hrefs 0 spider add-todo spider ;
[ add-todo ] keep ;
: slurp-heap-while ( heap quot1 quot2: ( value key -- ) -- ) : spider-page? ( spider -- ? )
pick heap-empty? [ 3drop ] [ {
[ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ] [ todo>> deque>> deque-empty? not ]
[ roll [ slurp-heap-while ] [ 3drop ] if ] 3bi [ [ todo>> peek-url depth>> ] [ max-depth>> ] bi < ]
] if ; inline recursive [ [ count>> ] [ max-count>> ] bi < ]
} 1&& ;
: setup-next-url ( spider -- spider url depth )
dup todo>> pop-url [ url>> ] [ depth>> ] bi ;
: spider-next-page ( spider -- )
setup-next-url spider-page ;
PRIVATE> PRIVATE>
: run-spider-loop ( spider -- )
dup spider-page? [
[ spider-next-page ] [ run-spider-loop ] bi
] [
drop
] if ;
: run-spider ( spider -- spider ) : run-spider ( spider -- spider )
"spider" [ "spider" [
dup spider [ queue-initial-links [ run-spider-loop ] keep
queue-initial-links
[ todo>> ] [ max-depth>> ] bi
'[
_ <= spider get
[ count>> ] [ max-count>> ] bi < and
] [ spider-page spider-sleep ] slurp-heap-while
spider get
] with-variable
] with-logging ; ] with-logging ;