Merge branch 'master' into smalltalk

db4
Slava Pestov 2009-04-01 00:00:20 -05:00
commit 86cf94260c
18 changed files with 240 additions and 181 deletions

View File

@ -10,12 +10,4 @@ IN: bootstrap.ui
{ [ os unix? ] [ "x11" ] }
} cond
] unless* "ui.backend." prepend require
"ui-text-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require
] when

View File

@ -3,68 +3,72 @@
USING: tools.test namespaces documents documents.elements multiline ;
IN: document.elements.tests
<document> "doc" set
"123\nabc" "doc" get set-doc-string
SYMBOL: doc
<document> doc set
"123\nabcé" doc get set-doc-string
! char-elt
[ { 0 0 } ] [ { 0 0 } "doc" get char-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 1 } "doc" get char-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 0 } "doc" get char-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 0 } doc get char-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 1 } doc get char-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 0 } doc get char-elt prev-elt ] unit-test
[ { 1 3 } ] [ { 1 5 } doc get char-elt prev-elt ] unit-test
[ { 1 3 } ] [ { 1 3 } "doc" get char-elt next-elt ] unit-test
[ { 0 2 } ] [ { 0 1 } "doc" get char-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 3 } "doc" get char-elt next-elt ] unit-test
[ { 1 5 } ] [ { 1 5 } doc get char-elt next-elt ] unit-test
[ { 0 2 } ] [ { 0 1 } doc get char-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 3 } doc get char-elt next-elt ] unit-test
[ { 1 5 } ] [ { 1 3 } doc get char-elt next-elt ] unit-test
! word-elt
<document> "doc" set
"Hello world\nanother line" "doc" get set-doc-string
<document> doc set
"Hello world\nanother line" doc get set-doc-string
[ { 0 0 } ] [ { 0 0 } "doc" get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } "doc" get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } "doc" get word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 6 } "doc" get word-elt prev-elt ] unit-test
[ { 0 6 } ] [ { 0 8 } "doc" get word-elt prev-elt ] unit-test
[ { 0 11 } ] [ { 1 0 } "doc" get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 0 } doc get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } doc get word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } doc get word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 6 } doc get word-elt prev-elt ] unit-test
[ { 0 6 } ] [ { 0 8 } doc get word-elt prev-elt ] unit-test
[ { 0 11 } ] [ { 1 0 } doc get word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 0 } doc get word-elt next-elt ] unit-test
[ { 0 6 } ] [ { 0 5 } doc get word-elt next-elt ] unit-test
[ { 0 11 } ] [ { 0 6 } doc get word-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 11 } doc get word-elt next-elt ] unit-test
[ { 0 5 } ] [ { 0 0 } "doc" get word-elt next-elt ] unit-test
[ { 0 6 } ] [ { 0 5 } "doc" get word-elt next-elt ] unit-test
[ { 0 11 } ] [ { 0 6 } "doc" get word-elt next-elt ] unit-test
[ { 1 0 } ] [ { 0 11 } "doc" get word-elt next-elt ] unit-test
! one-word-elt
[ { 0 0 } ] [ { 0 0 } "doc" get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } "doc" get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } "doc" get one-word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 2 } "doc" get one-word-elt next-elt ] unit-test
[ { 0 5 } ] [ { 0 5 } "doc" get one-word-elt next-elt ] unit-test
[ { 0 0 } ] [ { 0 0 } doc get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 2 } doc get one-word-elt prev-elt ] unit-test
[ { 0 0 } ] [ { 0 5 } doc get one-word-elt prev-elt ] unit-test
[ { 0 5 } ] [ { 0 2 } doc get one-word-elt next-elt ] unit-test
[ { 0 5 } ] [ { 0 5 } doc get one-word-elt next-elt ] unit-test
! line-elt
<document> "doc" set
"Hello\nworld, how are\nyou?" "doc" get set-doc-string
<document> doc set
"Hello\nworld, how are\nyou?" doc get set-doc-string
[ { 0 0 } ] [ { 0 3 } "doc" get line-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 3 } "doc" get line-elt prev-elt ] unit-test
[ { 2 4 } ] [ { 2 1 } "doc" get line-elt next-elt ] unit-test
[ { 0 0 } ] [ { 0 3 } doc get line-elt prev-elt ] unit-test
[ { 0 3 } ] [ { 1 3 } doc get line-elt prev-elt ] unit-test
[ { 2 4 } ] [ { 2 1 } doc get line-elt next-elt ] unit-test
! one-line-elt
[ { 1 0 } ] [ { 1 3 } "doc" get one-line-elt prev-elt ] unit-test
[ { 1 14 } ] [ { 1 3 } "doc" get one-line-elt next-elt ] unit-test
[ { 1 0 } ] [ { 1 3 } doc get one-line-elt prev-elt ] unit-test
[ { 1 14 } ] [ { 1 3 } doc get one-line-elt next-elt ] unit-test
! page-elt
<document> "doc" set
<document> doc set
<" First line
Second line
Third line
Fourth line
Fifth line
Sixth line"> "doc" get set-doc-string
Sixth line"> doc get set-doc-string
[ { 0 0 } ] [ { 3 3 } "doc" get 4 <page-elt> prev-elt ] unit-test
[ { 1 2 } ] [ { 5 2 } "doc" get 4 <page-elt> prev-elt ] unit-test
[ { 0 0 } ] [ { 3 3 } doc get 4 <page-elt> prev-elt ] unit-test
[ { 1 2 } ] [ { 5 2 } doc get 4 <page-elt> prev-elt ] unit-test
[ { 4 3 } ] [ { 0 3 } "doc" get 4 <page-elt> next-elt ] unit-test
[ { 5 10 } ] [ { 4 2 } "doc" get 4 <page-elt> next-elt ] unit-test
[ { 4 3 } ] [ { 0 3 } doc get 4 <page-elt> next-elt ] unit-test
[ { 5 10 } ] [ { 4 2 } doc get 4 <page-elt> next-elt ] unit-test
! doc-elt
[ { 0 0 } ] [ { 3 4 } "doc" get doc-elt prev-elt ] unit-test
[ { 5 10 } ] [ { 3 4 } "doc" get doc-elt next-elt ] unit-test
[ { 0 0 } ] [ { 3 4 } doc get doc-elt prev-elt ] unit-test
[ { 5 10 } ] [ { 3 4 } doc get doc-elt next-elt ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2006, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays combinators documents fry kernel math sequences
unicode.categories accessors ;
accessors unicode.categories unicode.breaks combinators.short-circuit ;
IN: documents.elements
GENERIC: prev-elt ( loc document elt -- newloc )
@ -20,27 +20,32 @@ SINGLETON: char-elt
<PRIVATE
: (prev-char) ( loc document quot -- loc )
: prev ( loc document quot: ( loc document -- loc ) -- loc )
{
{ [ pick { 0 0 } = ] [ 2drop ] }
{ [ pick second zero? ] [ drop [ first 1- ] dip line-end ] }
[ call ]
} cond ; inline
: (next-char) ( loc document quot -- loc )
: next ( loc document quot: ( loc document -- loc ) -- loc )
{
{ [ 2over doc-end = ] [ 2drop ] }
{ [ 2over line-end? ] [ 2drop first 1+ 0 2array ] }
[ call ]
} cond ; inline
: modify-col ( loc document quot: ( col str -- col' ) -- loc )
pick [
[ [ first2 swap ] dip doc-line ] dip call
] dip =col ; inline
PRIVATE>
M: char-elt prev-elt
drop [ drop -1 +col ] (prev-char) ;
drop [ [ last-grapheme-from ] modify-col ] prev ;
M: char-elt next-elt
drop [ drop 1 +col ] (next-char) ;
drop [ [ first-grapheme-from ] modify-col ] next ;
SINGLETON: one-char-elt
@ -50,21 +55,16 @@ M: one-char-elt next-elt 2drop ;
<PRIVATE
: (word-elt) ( loc document quot -- loc )
pick [
[ [ first2 swap ] dip doc-line ] dip call
] dip =col ; inline
: ((word-elt)) ( n seq -- n seq ? )
: blank-at? ( n seq -- n seq ? )
2dup ?nth blank? ;
: break-detector ( ? -- quot )
'[ blank? _ xor ] ; inline
: (prev-word) ( col str ? -- col )
: prev-word ( col str ? -- col )
break-detector find-last-from drop ?1+ ;
: (next-word) ( col str ? -- col )
: next-word ( col str ? -- col )
[ break-detector find-from drop ] [ drop length ] 2bi or ;
PRIVATE>
@ -73,23 +73,23 @@ SINGLETON: one-word-elt
M: one-word-elt prev-elt
drop
[ [ 1- ] dip f (prev-word) ] (word-elt) ;
[ [ 1- ] dip f prev-word ] modify-col ;
M: one-word-elt next-elt
drop
[ f (next-word) ] (word-elt) ;
[ f next-word ] modify-col ;
SINGLETON: word-elt
M: word-elt prev-elt
drop
[ [ [ 1- ] dip ((word-elt)) (prev-word) ] (word-elt) ]
(prev-char) ;
[ [ [ 1- ] dip blank-at? prev-word ] modify-col ]
prev ;
M: word-elt next-elt
drop
[ [ ((word-elt)) (next-word) ] (word-elt) ]
(next-char) ;
[ [ blank-at? next-word ] modify-col ]
next ;
SINGLETON: one-line-elt

View File

@ -155,18 +155,21 @@ TUPLE: peg-head rule-id involved-set eval-set ;
dup pos>> pos set ans>>
; inline
:: (setup-lr) ( r l s -- )
s head>> l head>> eq? [
l head>> s (>>head)
l head>> [ s rule-id>> suffix ] change-involved-set drop
r l s next>> (setup-lr)
] unless ;
:: (setup-lr) ( l s -- )
s [
s left-recursion? [ s throw ] unless
s head>> l head>> eq? [
l head>> s (>>head)
l head>> [ s rule-id>> suffix ] change-involved-set drop
l s next>> (setup-lr)
] unless
] when ;
:: setup-lr ( r l -- )
l head>> [
r rule-id V{ } clone V{ } clone peg-head boa l (>>head)
] unless
r l lrstack get (setup-lr) ;
l lrstack get (setup-lr) ;
:: lr-answer ( r p m -- ast )
[let* |
@ -216,8 +219,10 @@ TUPLE: peg-head rule-id involved-set eval-set ;
lrstack get next>> lrstack set
pos get m (>>pos)
lr head>> [
ans lr (>>seed)
r p m lr-answer
m ans>> left-recursion? [
ans lr (>>seed)
r p m lr-answer
] [ ans ] if
] [
ans m (>>ans)
ans

View File

@ -3,8 +3,7 @@
USING: accessors arrays assocs continuations kernel math models
namespaces opengl sequences io combinators combinators.short-circuit
fry math.vectors math.rectangles cache ui.gadgets ui.gestures
ui.render ui.text ui.text.private ui.backend ui.gadgets.tracks
ui.commands ;
ui.render ui.backend ui.gadgets.tracks ui.commands ;
IN: ui.gadgets.worlds
TUPLE: world < track
@ -53,7 +52,6 @@ M: world request-focus-on ( child gadget -- )
swap >>status
swap >>title
swap 1 track-add
dup init-text-rendering
dup request-focus ;
: <world> ( gadget title status -- world )
@ -74,15 +72,20 @@ M: world remove-gadget
2dup layers>> memq?
[ layers>> delq ] [ call-next-method ] if ;
SYMBOL: flush-layout-cache-hook
flush-layout-cache-hook [ [ ] ] initialize
: (draw-world) ( world -- )
dup handle>> [
{
[ init-gl ]
[ draw-gadget ]
[ finish-text-rendering ]
[ text-handle>> [ purge-cache ] when* ]
[ images>> [ purge-cache ] when* ]
} cleave
] with-gl-context ;
] with-gl-context
flush-layout-cache-hook get call( -- ) ;
: draw-world? ( world -- ? )
#! We don't draw deactivated worlds, or those with 0 size.

View File

@ -18,12 +18,11 @@ M: core-text-renderer string-dim
[ cached-line dim>> ]
if-empty ;
M: core-text-renderer finish-text-rendering
text-handle>> purge-cache
M: core-text-renderer flush-layout-cache
cached-lines get purge-cache ;
: rendered-line ( font string -- texture )
world get text-handle>>
world get world-text-handle
[ cached-line [ image>> ] [ loc>> ] bi <texture> ]
2cache ;

View File

@ -14,12 +14,11 @@ M: pango-renderer string-dim
[ " " string-dim { 0 1 } v* ]
[ cached-layout logical-rect>> dim>> [ >integer ] map ] if-empty ;
M: pango-renderer finish-text-rendering
text-handle>> purge-cache
M: pango-renderer flush-layout-cache
cached-layouts get purge-cache ;
: rendered-layout ( font string -- texture )
world get text-handle>>
world get world-text-handle
[ cached-layout [ image>> ] [ text-position vneg ] bi <texture> ]
2cache ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays sequences math math.order opengl opengl.gl
strings fonts colors accessors ;
strings fonts colors accessors namespaces ui.gadgets.worlds ;
IN: ui.text
<PRIVATE
@ -10,9 +10,13 @@ SYMBOL: font-renderer
HOOK: init-text-rendering font-renderer ( world -- )
HOOK: finish-text-rendering font-renderer ( world -- )
: world-text-handle ( world -- handle )
dup text-handle>> [ dup init-text-rendering ] unless
text-handle>> ;
M: object finish-text-rendering drop ;
HOOK: flush-layout-cache font-renderer ( -- )
[ flush-layout-cache ] flush-layout-cache-hook set-global
HOOK: string-dim font-renderer ( font string -- dim )
@ -69,3 +73,13 @@ M: array draw-text
[ [ 0.0 ] 2dip string-height 0.0 glTranslated ] 2bi
] with each
] do-matrix ;
USING: vocabs.loader namespaces system combinators ;
"ui-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require

View File

@ -4,8 +4,7 @@ USING: arrays assocs io kernel math models namespaces make dlists
deques sequences threads sequences words continuations init
combinators hashtables concurrency.flags sets accessors calendar fry
destructors ui.gadgets ui.gadgets.private ui.gadgets.worlds
ui.gadgets.tracks ui.gestures ui.backend ui.render ui.text
ui.text.private ;
ui.gadgets.tracks ui.gestures ui.backend ui.render ;
IN: ui
<PRIVATE
@ -63,7 +62,7 @@ M: world graft*
: (ungraft-world) ( world -- )
{
[ handle>> select-gl-context ]
[ text-handle>> dispose ]
[ text-handle>> [ dispose ] when* ]
[ images>> [ dispose ] when* ]
[ hand-clicked close-global ]
[ hand-gadget close-global ]
@ -95,8 +94,7 @@ M: world ungraft*
: restore-world ( world -- )
{
[ reset-world ]
[ init-text-rendering ]
[ f >>images drop ]
[ f >>text-handle f >>images drop ]
[ restore-gadget ]
} cleave ;

View File

@ -9,6 +9,9 @@ IN: unicode.breaks.tests
[ 3 ] [ "\u001112\u001161\u0011abA\u000300a"
dup last-grapheme head last-grapheme ] unit-test
[ 3 ] [ 2 "hello" first-grapheme-from ] unit-test
[ 1 ] [ 2 "hello" last-grapheme-from ] unit-test
: grapheme-break-test ( -- filename )
"vocab:unicode/breaks/GraphemeBreakTest.txt" ;

View File

@ -101,6 +101,16 @@ PRIVATE>
[ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
nip swap length or 1+ ;
: first-grapheme-from ( start str -- i )
over tail-slice first-grapheme + ;
: last-grapheme ( str -- i )
unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
: last-grapheme-from ( end str -- i )
swap head-slice last-grapheme ;
<PRIVATE
: >pieces ( str quot: ( str -- i ) -- graphemes )
@ -114,10 +124,6 @@ PRIVATE>
: string-reverse ( str -- rts )
>graphemes reverse concat ;
: last-grapheme ( str -- i )
unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
<PRIVATE
graphemes init-table table

View File

@ -15,6 +15,7 @@ ERROR: bad-effect ;
scan {
{ "(" [ ")" parse-effect ] }
{ f [ ")" unexpected-eof ] }
[ bad-effect ]
} case 2array
] when
] if

View File

@ -97,7 +97,7 @@ ERROR: bad-slot-value value class ;
"writing" associate ;
: define-writer-generic ( name -- )
writer-word (( object value -- )) define-simple-generic ;
writer-word (( value object -- )) define-simple-generic ;
: define-writer ( class slot-spec -- )
[ nip name>> define-writer-generic ] [

View File

@ -68,10 +68,10 @@ SYMBOL: tagstack
[ blank? ] trim ;
: read-comment ( state-parser -- )
"-->" take-until-string make-comment-tag push-tag ;
"-->" take-until-sequence make-comment-tag push-tag ;
: read-dtd ( state-parser -- )
">" take-until-string make-dtd-tag push-tag ;
">" take-until-sequence make-dtd-tag push-tag ;
: read-bang ( state-parser -- )
next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
@ -93,7 +93,7 @@ SYMBOL: tagstack
: (parse-attributes) ( state-parser -- )
skip-whitespace
dup string-parse-end? [
dup state-parse-end? [
drop
] [
[
@ -108,7 +108,7 @@ SYMBOL: tagstack
: (parse-tag) ( string -- string' hashtable )
[
[ read-token >lower ] [ parse-attributes ] bi
] string-parse ;
] state-parse ;
: read-< ( state-parser -- string/f )
next dup get-char [
@ -126,7 +126,7 @@ SYMBOL: tagstack
] [ drop ] if ;
: tag-parse ( quot -- vector )
V{ } clone tagstack [ string-parse ] with-variable ; inline
V{ } clone tagstack [ state-parse ] with-variable ; inline
: parse-html ( string -- vector )
[ (parse-html) tagstack get ] tag-parse ;

View File

@ -2,29 +2,35 @@ USING: tools.test html.parser.state ascii kernel accessors ;
IN: html.parser.state.tests
[ "hello" ]
[ "hello" [ take-rest ] string-parse ] unit-test
[ "hello" [ take-rest ] state-parse ] unit-test
[ "hi" " how are you?" ]
[
"hi how are you?"
[ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse
[ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse
] unit-test
[ "foo" ";bar" ]
[
"foo;bar" [
[ CHAR: ; take-until-char ] [ take-rest ] bi
] string-parse
[ CHAR: ; take-until-object ] [ take-rest ] bi
] state-parse
] unit-test
[ "foo " " bar" ]
[
"foo and bar" [
[ "and" take-until-string ] [ take-rest ] bi
] string-parse
[ "and" take-until-sequence ] [ take-rest ] bi
] state-parse
] unit-test
[ 6 ]
[
" foo " [ skip-whitespace i>> ] string-parse
" foo " [ skip-whitespace n>> ] state-parse
] unit-test
[ { 1 2 } ]
[ { 1 2 3 } <state-parser> [ 3 = ] take-until ] unit-test
[ { 1 2 } ]
[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test

View File

@ -2,31 +2,32 @@
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals ;
IN: html.parser.state
TUPLE: state-parser string i ;
TUPLE: state-parser sequence n ;
: <state-parser> ( string -- state-parser )
: <state-parser> ( sequence -- state-parser )
state-parser new
swap >>string
0 >>i ;
swap >>sequence
0 >>n ;
: (get-char) ( i state -- char/f )
string>> ?nth ; inline
: (get-char) ( n state -- char/f )
sequence>> ?nth ; inline
: get-char ( state -- char/f )
[ i>> ] keep (get-char) ; inline
[ n>> ] keep (get-char) ; inline
: get-next ( state -- char/f )
[ i>> 1+ ] keep (get-char) ; inline
[ n>> 1 + ] keep (get-char) ; inline
: next ( state -- state )
[ 1+ ] change-i ; inline
[ 1 + ] change-n ; inline
: get+increment ( state -- char/f )
[ get-char ] [ next drop ] bi ; inline
: string-parse ( string quot -- )
: state-parse ( sequence quot -- )
[ <state-parser> ] dip call ; inline
:: skip-until ( state quot: ( obj -- ? ) -- )
@ -34,17 +35,23 @@ TUPLE: state-parser string i ;
quot call [ state next quot skip-until ] unless
] when* ; inline recursive
: take-until ( state quot: ( obj -- ? ) -- string )
[ drop i>> ]
[ skip-until ]
[ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline
: state-parse-end? ( state -- ? ) get-next not ;
:: take-until-string ( state-parser string -- string' )
string length <growing-circular> :> growing
: take-until ( state quot: ( obj -- ? ) -- sequence/f )
over state-parse-end? [
2drop f
] [
[ drop n>> ]
[ skip-until ]
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
] if ; inline
:: take-until-sequence ( state-parser sequence -- sequence' )
sequence length <growing-circular> :> growing
state-parser
[
growing push-growing-circular
string growing sequence=
sequence growing sequence=
] take-until :> found
found dup length
growing length 1- - head
@ -53,10 +60,8 @@ TUPLE: state-parser string i ;
: skip-whitespace ( state -- state )
[ [ blank? not ] take-until drop ] keep ;
: take-rest ( state -- string )
: take-rest ( state -- sequence )
[ drop f ] take-until ; inline
: take-until-char ( state ch -- string )
: take-until-object ( state obj -- sequence )
'[ _ = ] take-until ;
: string-parse-end? ( state -- ? ) get-next not ;

View File

@ -16,11 +16,6 @@ HELP: run-spider
{ "spider" spider } }
{ $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ;
HELP: slurp-heap-while
{ $values
{ "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } }
{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ;
ARTICLE: "spider-tutorial" "Spider tutorial"
"To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider."
{ $code <" "http://concatenative.org" <spider> "> }

View File

@ -3,8 +3,8 @@
USING: accessors fry html.parser html.parser.analyzer
http.client kernel tools.time sets assocs sequences
concurrency.combinators io threads namespaces math multiline
heaps math.parser inspector urls assoc-heaps logging
combinators.short-circuit continuations calendar prettyprint ;
math.parser inspector urls logging combinators.short-circuit
continuations calendar prettyprint dlists deques locals ;
IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links
@ -13,12 +13,33 @@ filters spidered todo nonmatching quiet ;
TUPLE: spider-result url depth headers fetch-time parsed-html
links processing-time timestamp ;
TUPLE: todo-url url depth ;
: <todo-url> ( url depth -- todo-url )
todo-url new
swap >>depth
swap >>url ;
TUPLE: unique-deque assoc deque ;
: <unique-deque> ( -- unique-deque )
H{ } clone <dlist> unique-deque boa ;
: push-url ( url depth unique-deque -- )
[ <todo-url> ] dip
[ [ [ t ] dip url>> ] [ assoc>> ] bi* set-at ]
[ deque>> push-back ] 2bi ;
: pop-url ( unique-deque -- todo-url ) deque>> pop-front ;
: peek-url ( unique-deque -- todo-url ) deque>> peek-front ;
: <spider> ( base -- spider )
>url
spider new
over >>base
swap 0 <unique-min-heap> [ heap-push ] keep >>todo
<unique-min-heap> >>nonmatching
swap 0 <unique-deque> [ push-url ] keep >>todo
<unique-deque> >>nonmatching
0 >>max-depth
0 >>count
1/0. >>max-count
@ -27,10 +48,10 @@ links processing-time timestamp ;
<PRIVATE
: apply-filters ( links spider -- links' )
filters>> [ '[ _ 1&& ] filter ] when* ;
filters>> [ '[ [ _ 1&& ] filter ] call( seq -- seq' ) ] when* ;
: push-links ( links level assoc-heap -- )
'[ _ _ heap-push ] each ;
: push-links ( links level unique-deque -- )
'[ _ _ push-url ] each ;
: add-todo ( links level spider -- )
todo>> push-links ;
@ -38,64 +59,72 @@ links processing-time timestamp ;
: add-nonmatching ( links level spider -- )
nonmatching>> push-links ;
: filter-base ( spider spider-result -- base-links nonmatching-links )
: filter-base-links ( spider spider-result -- base-links nonmatching-links )
[ base>> host>> ] [ links>> prune ] bi*
[ host>> = ] with partition ;
: add-spidered ( spider spider-result -- )
[ [ 1+ ] change-count ] dip
2dup [ spidered>> ] [ dup url>> ] bi* rot set-at
[ filter-base ] 2keep
[ filter-base-links ] 2keep
depth>> 1+ swap
[ add-nonmatching ]
[ tuck [ apply-filters ] 2dip add-todo ] 2bi ;
: normalize-hrefs ( links -- links' )
[ >url ] map
spider get base>> swap [ derive-url ] with map ;
: normalize-hrefs ( links spider -- links' )
[ [ >url ] map ] dip
base>> swap [ derive-url ] with map ;
: print-spidering ( url depth -- )
"depth: " write number>string write
", spidering: " write . yield ;
: (spider-page) ( url depth -- spider-result )
f pick spider get spidered>> set-at
over '[ _ http-get ] benchmark swap
[ parse-html dup find-hrefs normalize-hrefs ] benchmark
:: new-spidered-result ( spider url depth -- spider-result )
f url spider spidered>> set-at
[ url http-get ] benchmark :> fetch-time :> html :> headers
[
html parse-html [ ] [ find-hrefs spider normalize-hrefs ] bi
] benchmark :> processing-time :> links :> parsed-html
url depth headers fetch-time parsed-html links processing-time
now spider-result boa ;
: spider-page ( url depth -- )
spider get quiet>> [ 2dup print-spidering ] unless
(spider-page)
spider get [ quiet>> [ dup describe ] unless ]
[ swap add-spidered ] bi ;
:: spider-page ( spider url depth -- )
spider quiet>> [ url depth print-spidering ] unless
spider url depth new-spidered-result :> spidered-result
spider quiet>> [ spidered-result describe ] unless
spider spidered-result add-spidered ;
\ spider-page ERROR add-error-logging
: spider-sleep ( -- )
spider get sleep>> [ sleep ] when* ;
: spider-sleep ( spider -- )
sleep>> [ sleep ] when* ;
: queue-initial-links ( spider -- spider )
[ initial-links>> normalize-hrefs 0 ] keep
[ add-todo ] keep ;
:: queue-initial-links ( spider -- spider )
spider initial-links>> spider normalize-hrefs 0 spider add-todo spider ;
: slurp-heap-while ( heap quot1 quot2: ( value key -- ) -- )
pick heap-empty? [ 3drop ] [
[ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ]
[ roll [ slurp-heap-while ] [ 3drop ] if ] 3bi
] if ; inline recursive
: spider-page? ( spider -- ? )
{
[ todo>> deque>> deque-empty? not ]
[ [ todo>> peek-url depth>> ] [ max-depth>> ] bi < ]
[ [ count>> ] [ max-count>> ] bi < ]
} 1&& ;
: setup-next-url ( spider -- spider url depth )
dup todo>> pop-url [ url>> ] [ depth>> ] bi ;
: spider-next-page ( spider -- )
setup-next-url spider-page ;
PRIVATE>
: run-spider-loop ( spider -- )
dup spider-page? [
[ spider-next-page ] [ run-spider-loop ] bi
] [
drop
] if ;
: run-spider ( spider -- spider )
"spider" [
dup spider [
queue-initial-links
[ todo>> ] [ max-depth>> ] bi
'[
_ <= spider get
[ count>> ] [ max-count>> ] bi < and
] [ spider-page spider-sleep ] slurp-heap-while
spider get
] with-variable
queue-initial-links [ run-spider-loop ] keep
] with-logging ;