diff --git a/basis/bootstrap/ui/ui.factor b/basis/bootstrap/ui/ui.factor index 4f7f82a067..271a99c223 100755 --- a/basis/bootstrap/ui/ui.factor +++ b/basis/bootstrap/ui/ui.factor @@ -10,12 +10,4 @@ IN: bootstrap.ui { [ os unix? ] [ "x11" ] } } cond ] unless* "ui.backend." prepend require - - "ui-text-backend" get [ - { - { [ os macosx? ] [ "core-text" ] } - { [ os windows? ] [ "pango" ] } - { [ os unix? ] [ "pango" ] } - } cond - ] unless* "ui.text." prepend require ] when diff --git a/basis/math/blas/ffi/ffi.factor b/basis/math/blas/ffi/ffi.factor index 0603a91370..bc98f72d8b 100644 --- a/basis/math/blas/ffi/ffi.factor +++ b/basis/math/blas/ffi/ffi.factor @@ -11,7 +11,6 @@ IN: math.blas.ffi [ os [ freebsd? ] [ linux? cpu x86.32? and ] bi or ] [ "libblas.so" gfortran-abi add-fortran-library ] } - { [ os [ freebsd? ] [ linux? ] bi or ] [ "libblas.so" gfortran-abi add-fortran-library ] } [ "libblas.so" f2c-abi add-fortran-library ] } cond >> diff --git a/basis/peg/peg.factor b/basis/peg/peg.factor index ce34beb725..dda36432e7 100644 --- a/basis/peg/peg.factor +++ b/basis/peg/peg.factor @@ -155,18 +155,21 @@ TUPLE: peg-head rule-id involved-set eval-set ; dup pos>> pos set ans>> ; inline -:: (setup-lr) ( r l s -- ) - s head>> l head>> eq? [ - l head>> s (>>head) - l head>> [ s rule-id>> suffix ] change-involved-set drop - r l s next>> (setup-lr) - ] unless ; +:: (setup-lr) ( l s -- ) + s [ + s left-recursion? [ s throw ] unless + s head>> l head>> eq? [ + l head>> s (>>head) + l head>> [ s rule-id>> suffix ] change-involved-set drop + l s next>> (setup-lr) + ] unless + ] when ; :: setup-lr ( r l -- ) l head>> [ r rule-id V{ } clone V{ } clone peg-head boa l (>>head) ] unless - r l lrstack get (setup-lr) ; + l lrstack get (setup-lr) ; :: lr-answer ( r p m -- ast ) [let* | @@ -216,8 +219,10 @@ TUPLE: peg-head rule-id involved-set eval-set ; lrstack get next>> lrstack set pos get m (>>pos) lr head>> [ - ans lr (>>seed) - r p m lr-answer + m ans>> left-recursion? [ + ans lr (>>seed) + r p m lr-answer + ] [ ans ] if ] [ ans m (>>ans) ans diff --git a/basis/ui/gadgets/worlds/worlds.factor b/basis/ui/gadgets/worlds/worlds.factor index 163dbff514..655c9ba49d 100644 --- a/basis/ui/gadgets/worlds/worlds.factor +++ b/basis/ui/gadgets/worlds/worlds.factor @@ -3,8 +3,7 @@ USING: accessors arrays assocs continuations kernel math models namespaces opengl sequences io combinators combinators.short-circuit fry math.vectors math.rectangles cache ui.gadgets ui.gestures -ui.render ui.text ui.text.private ui.backend ui.gadgets.tracks -ui.commands ; +ui.render ui.backend ui.gadgets.tracks ui.commands ; IN: ui.gadgets.worlds TUPLE: world < track @@ -53,7 +52,6 @@ M: world request-focus-on ( child gadget -- ) swap >>status swap >>title swap 1 track-add - dup init-text-rendering dup request-focus ; : <world> ( gadget title status -- world ) @@ -74,15 +72,20 @@ M: world remove-gadget 2dup layers>> memq? [ layers>> delq ] [ call-next-method ] if ; +SYMBOL: flush-layout-cache-hook + +flush-layout-cache-hook [ [ ] ] initialize + : (draw-world) ( world -- ) dup handle>> [ { [ init-gl ] [ draw-gadget ] - [ finish-text-rendering ] + [ text-handle>> [ purge-cache ] when* ] [ images>> [ purge-cache ] when* ] } cleave - ] with-gl-context ; + ] with-gl-context + flush-layout-cache-hook get call( -- ) ; : draw-world? ( world -- ? ) #! We don't draw deactivated worlds, or those with 0 size. diff --git a/basis/ui/text/core-text/core-text.factor b/basis/ui/text/core-text/core-text.factor index 785a9366cb..3704189e48 100644 --- a/basis/ui/text/core-text/core-text.factor +++ b/basis/ui/text/core-text/core-text.factor @@ -18,12 +18,11 @@ M: core-text-renderer string-dim [ cached-line dim>> ] if-empty ; -M: core-text-renderer finish-text-rendering - text-handle>> purge-cache +M: core-text-renderer flush-layout-cache cached-lines get purge-cache ; : rendered-line ( font string -- texture ) - world get text-handle>> + world get world-text-handle [ cached-line [ image>> ] [ loc>> ] bi <texture> ] 2cache ; diff --git a/basis/ui/text/pango/pango.factor b/basis/ui/text/pango/pango.factor index 8b644be469..017a4b2cf2 100755 --- a/basis/ui/text/pango/pango.factor +++ b/basis/ui/text/pango/pango.factor @@ -14,12 +14,11 @@ M: pango-renderer string-dim [ " " string-dim { 0 1 } v* ] [ cached-layout logical-rect>> dim>> [ >integer ] map ] if-empty ; -M: pango-renderer finish-text-rendering - text-handle>> purge-cache +M: pango-renderer flush-layout-cache cached-layouts get purge-cache ; : rendered-layout ( font string -- texture ) - world get text-handle>> + world get world-text-handle [ cached-layout [ image>> ] [ text-position vneg ] bi <texture> ] 2cache ; diff --git a/basis/ui/text/text.factor b/basis/ui/text/text.factor index d0766e9ee6..ebf4b9cce0 100644 --- a/basis/ui/text/text.factor +++ b/basis/ui/text/text.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays sequences math math.order opengl opengl.gl -strings fonts colors accessors ; +strings fonts colors accessors namespaces ui.gadgets.worlds ; IN: ui.text <PRIVATE @@ -10,9 +10,13 @@ SYMBOL: font-renderer HOOK: init-text-rendering font-renderer ( world -- ) -HOOK: finish-text-rendering font-renderer ( world -- ) +: world-text-handle ( world -- handle ) + dup text-handle>> [ dup init-text-rendering ] unless + text-handle>> ; -M: object finish-text-rendering drop ; +HOOK: flush-layout-cache font-renderer ( -- ) + +[ flush-layout-cache ] flush-layout-cache-hook set-global HOOK: string-dim font-renderer ( font string -- dim ) @@ -68,4 +72,14 @@ M: array draw-text [ draw-string ] [ [ 0.0 ] 2dip string-height 0.0 glTranslated ] 2bi ] with each - ] do-matrix ; \ No newline at end of file + ] do-matrix ; + +USING: vocabs.loader namespaces system combinators ; + +"ui-backend" get [ + { + { [ os macosx? ] [ "core-text" ] } + { [ os windows? ] [ "pango" ] } + { [ os unix? ] [ "pango" ] } + } cond +] unless* "ui.text." prepend require \ No newline at end of file diff --git a/basis/ui/ui.factor b/basis/ui/ui.factor index 8ce8f57cf0..bf17e455f8 100644 --- a/basis/ui/ui.factor +++ b/basis/ui/ui.factor @@ -4,8 +4,7 @@ USING: arrays assocs io kernel math models namespaces make dlists deques sequences threads sequences words continuations init combinators hashtables concurrency.flags sets accessors calendar fry destructors ui.gadgets ui.gadgets.private ui.gadgets.worlds -ui.gadgets.tracks ui.gestures ui.backend ui.render ui.text -ui.text.private ; +ui.gadgets.tracks ui.gestures ui.backend ui.render ; IN: ui <PRIVATE @@ -63,7 +62,7 @@ M: world graft* : (ungraft-world) ( world -- ) { [ handle>> select-gl-context ] - [ text-handle>> dispose ] + [ text-handle>> [ dispose ] when* ] [ images>> [ dispose ] when* ] [ hand-clicked close-global ] [ hand-gadget close-global ] @@ -95,8 +94,7 @@ M: world ungraft* : restore-world ( world -- ) { [ reset-world ] - [ init-text-rendering ] - [ f >>images drop ] + [ f >>text-handle f >>images drop ] [ restore-gadget ] } cleave ; diff --git a/core/effects/parser/parser.factor b/core/effects/parser/parser.factor index b9cb0ddcc9..c8ed6da2aa 100644 --- a/core/effects/parser/parser.factor +++ b/core/effects/parser/parser.factor @@ -15,6 +15,7 @@ ERROR: bad-effect ; scan { { "(" [ ")" parse-effect ] } { f [ ")" unexpected-eof ] } + [ bad-effect ] } case 2array ] when ] if @@ -31,4 +32,4 @@ ERROR: bad-effect ; "(" expect ")" parse-effect ; : parse-call( ( accum word -- accum ) - [ ")" parse-effect ] dip 2array over push-all ; \ No newline at end of file + [ ")" parse-effect ] dip 2array over push-all ; diff --git a/core/slots/slots.factor b/core/slots/slots.factor index 46fd325fa5..a353f50947 100755 --- a/core/slots/slots.factor +++ b/core/slots/slots.factor @@ -97,7 +97,7 @@ ERROR: bad-slot-value value class ; "writing" associate ; : define-writer-generic ( name -- ) - writer-word (( object value -- )) define-simple-generic ; + writer-word (( value object -- )) define-simple-generic ; : define-writer ( class slot-spec -- ) [ nip name>> define-writer-generic ] [ diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index 677737618b..94ef59bdfd 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -68,10 +68,10 @@ SYMBOL: tagstack [ blank? ] trim ; : read-comment ( state-parser -- ) - "-->" take-until-string make-comment-tag push-tag ; + "-->" take-until-sequence make-comment-tag push-tag ; : read-dtd ( state-parser -- ) - ">" take-until-string make-dtd-tag push-tag ; + ">" take-until-sequence make-dtd-tag push-tag ; : read-bang ( state-parser -- ) next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [ @@ -93,7 +93,7 @@ SYMBOL: tagstack : (parse-attributes) ( state-parser -- ) skip-whitespace - dup string-parse-end? [ + dup state-parse-end? [ drop ] [ [ @@ -108,7 +108,7 @@ SYMBOL: tagstack : (parse-tag) ( string -- string' hashtable ) [ [ read-token >lower ] [ parse-attributes ] bi - ] string-parse ; + ] state-parse ; : read-< ( state-parser -- string/f ) next dup get-char [ @@ -126,7 +126,7 @@ SYMBOL: tagstack ] [ drop ] if ; : tag-parse ( quot -- vector ) - V{ } clone tagstack [ string-parse ] with-variable ; inline + V{ } clone tagstack [ state-parse ] with-variable ; inline : parse-html ( string -- vector ) [ (parse-html) tagstack get ] tag-parse ; diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index f676649aa8..f9862e1e69 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -2,29 +2,35 @@ USING: tools.test html.parser.state ascii kernel accessors ; IN: html.parser.state.tests [ "hello" ] -[ "hello" [ take-rest ] string-parse ] unit-test +[ "hello" [ take-rest ] state-parse ] unit-test [ "hi" " how are you?" ] [ "hi how are you?" - [ [ [ blank? ] take-until ] [ take-rest ] bi ] string-parse + [ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse ] unit-test [ "foo" ";bar" ] [ "foo;bar" [ - [ CHAR: ; take-until-char ] [ take-rest ] bi - ] string-parse + [ CHAR: ; take-until-object ] [ take-rest ] bi + ] state-parse ] unit-test [ "foo " " bar" ] [ "foo and bar" [ - [ "and" take-until-string ] [ take-rest ] bi - ] string-parse + [ "and" take-until-sequence ] [ take-rest ] bi + ] state-parse ] unit-test [ 6 ] [ - " foo " [ skip-whitespace i>> ] string-parse + " foo " [ skip-whitespace n>> ] state-parse ] unit-test + +[ { 1 2 } ] +[ { 1 2 3 } <state-parser> [ 3 = ] take-until ] unit-test + +[ { 1 2 } ] +[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index c69fd76af5..2369b1d750 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -2,31 +2,32 @@ ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular unicode.case unicode.categories locals ; + IN: html.parser.state -TUPLE: state-parser string i ; +TUPLE: state-parser sequence n ; -: <state-parser> ( string -- state-parser ) +: <state-parser> ( sequence -- state-parser ) state-parser new - swap >>string - 0 >>i ; + swap >>sequence + 0 >>n ; -: (get-char) ( i state -- char/f ) - string>> ?nth ; inline +: (get-char) ( n state -- char/f ) + sequence>> ?nth ; inline : get-char ( state -- char/f ) - [ i>> ] keep (get-char) ; inline + [ n>> ] keep (get-char) ; inline : get-next ( state -- char/f ) - [ i>> 1+ ] keep (get-char) ; inline + [ n>> 1 + ] keep (get-char) ; inline : next ( state -- state ) - [ 1+ ] change-i ; inline + [ 1 + ] change-n ; inline : get+increment ( state -- char/f ) [ get-char ] [ next drop ] bi ; inline -: string-parse ( string quot -- ) +: state-parse ( sequence quot -- ) [ <state-parser> ] dip call ; inline :: skip-until ( state quot: ( obj -- ? ) -- ) @@ -34,17 +35,23 @@ TUPLE: state-parser string i ; quot call [ state next quot skip-until ] unless ] when* ; inline recursive -: take-until ( state quot: ( obj -- ? ) -- string ) - [ drop i>> ] - [ skip-until ] - [ drop [ i>> ] [ string>> ] bi ] 2tri subseq ; inline +: state-parse-end? ( state -- ? ) get-next not ; -:: take-until-string ( state-parser string -- string' ) - string length <growing-circular> :> growing +: take-until ( state quot: ( obj -- ? ) -- sequence/f ) + over state-parse-end? [ + 2drop f + ] [ + [ drop n>> ] + [ skip-until ] + [ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq + ] if ; inline + +:: take-until-sequence ( state-parser sequence -- sequence' ) + sequence length <growing-circular> :> growing state-parser [ growing push-growing-circular - string growing sequence= + sequence growing sequence= ] take-until :> found found dup length growing length 1- - head @@ -53,10 +60,8 @@ TUPLE: state-parser string i ; : skip-whitespace ( state -- state ) [ [ blank? not ] take-until drop ] keep ; -: take-rest ( state -- string ) +: take-rest ( state -- sequence ) [ drop f ] take-until ; inline -: take-until-char ( state ch -- string ) +: take-until-object ( state obj -- sequence ) '[ _ = ] take-until ; - -: string-parse-end? ( state -- ? ) get-next not ; diff --git a/extra/smalltalk/ast/ast.factor b/extra/smalltalk/ast/ast.factor new file mode 100644 index 0000000000..fc415aa361 --- /dev/null +++ b/extra/smalltalk/ast/ast.factor @@ -0,0 +1,53 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: strings arrays memoize kernel sequences accessors combinators ; +IN: smalltalk.ast + +SINGLETONS: nil self super ; + +TUPLE: ast-comment { string string } ; +TUPLE: ast-block { arguments array } { temporaries array } { body array } ; +TUPLE: ast-message-send receiver { selector string } { arguments array } ; +TUPLE: ast-message { selector string } { arguments array } ; +TUPLE: ast-cascade receiver { messages array } ; +TUPLE: ast-name { name string } ; +TUPLE: ast-return value ; +TUPLE: ast-assignment { name ast-name } value ; +TUPLE: ast-local-variables { names array } ; +TUPLE: ast-method { name string } { body ast-block } ; +TUPLE: ast-class { name string } { superclass string } { ivars array } { methods array } ; +TUPLE: ast-foreign { class string } { name string } ; +TUPLE: ast-sequence { temporaries array } { body array } ; + +! We treat a sequence of statements like a block in a few places to +! simplify handling of top-level forms +M: ast-sequence arguments>> drop { } ; + +: unclip-temporaries ( statements -- temporaries statements' ) + { + { [ dup empty? ] [ { } ] } + { [ dup first ast-local-variables? not ] [ { } ] } + [ unclip names>> ] + } cond swap ; + +: <ast-block> ( arguments body -- block ) + unclip-temporaries ast-block boa ; + +: <ast-sequence> ( body -- block ) + unclip-temporaries ast-sequence boa ; + +! The parser parses normal message sends as cascades with one message, but +! we represent them differently in the AST to simplify generated code in +! the common case +: <ast-cascade> ( receiver messages -- ast ) + dup length 1 = + [ first [ selector>> ] [ arguments>> ] bi ast-message-send boa ] + [ ast-cascade boa ] + if ; + +! Methods return self by default +: <ast-method> ( class arguments body -- method ) + self suffix <ast-block> ast-method boa ; + +TUPLE: symbol { name string } ; +MEMO: intern ( name -- symbol ) symbol boa ; \ No newline at end of file diff --git a/extra/smalltalk/ast/authors.txt b/extra/smalltalk/ast/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/ast/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/authors.txt b/extra/smalltalk/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/classes/authors.txt b/extra/smalltalk/classes/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/classes/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/classes/classes.factor b/extra/smalltalk/classes/classes.factor new file mode 100644 index 0000000000..1798aad961 --- /dev/null +++ b/extra/smalltalk/classes/classes.factor @@ -0,0 +1,25 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel namespaces assocs accessors words sequences classes.tuple ; +IN: smalltalk.classes + +SYMBOL: classes + +classes [ H{ } clone ] initialize + +: create-class ( class -- class ) + "smalltalk.classes" create ; + +ERROR: no-class name ; + +: lookup-class ( class -- class ) + classes get ?at [ ] [ no-class ] if ; + +: define-class ( class superclass ivars -- class-word ) + [ create-class ] [ lookup-class ] [ ] tri* + [ define-tuple-class ] [ 2drop dup dup name>> classes get set-at ] 3bi ; + +: define-foreign ( class name -- ) + classes get set-at ; + +tuple "Object" define-foreign \ No newline at end of file diff --git a/extra/smalltalk/compiler/assignment/assignment.factor b/extra/smalltalk/compiler/assignment/assignment.factor new file mode 100644 index 0000000000..3a0a769f86 --- /dev/null +++ b/extra/smalltalk/compiler/assignment/assignment.factor @@ -0,0 +1,36 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays kernel sequences sets smalltalk.ast ; +IN: smalltalk.compiler.assignment + +GENERIC: assigned-locals ( ast -- seq ) + +M: ast-return assigned-locals value>> assigned-locals ; + +M: ast-block assigned-locals + [ body>> assigned-locals ] [ arguments>> ] bi diff ; + +M: ast-message-send assigned-locals + [ receiver>> assigned-locals ] + [ arguments>> assigned-locals ] + bi append ; + +M: ast-cascade assigned-locals + [ receiver>> assigned-locals ] + [ messages>> assigned-locals ] + bi append ; + +M: ast-message assigned-locals + arguments>> assigned-locals ; + +M: ast-assignment assigned-locals + [ name>> dup ast-name? [ name>> 1array ] [ drop { } ] if ] + [ value>> assigned-locals ] bi append ; + +M: ast-sequence assigned-locals + body>> assigned-locals ; + +M: array assigned-locals + [ assigned-locals ] map concat ; + +M: object assigned-locals drop f ; \ No newline at end of file diff --git a/extra/smalltalk/compiler/assignment/authors.txt b/extra/smalltalk/compiler/assignment/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/compiler/assignment/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/compiler/authors.txt b/extra/smalltalk/compiler/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/compiler/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/compiler/compiler-tests.factor b/extra/smalltalk/compiler/compiler-tests.factor new file mode 100644 index 0000000000..81b38f2c14 --- /dev/null +++ b/extra/smalltalk/compiler/compiler-tests.factor @@ -0,0 +1,87 @@ +USING: smalltalk.compiler tools.test prettyprint smalltalk.ast +smalltalk.compiler.lexenv stack-checker locals.rewrite.closures +kernel accessors compiler.units sequences arrays ; +IN: smalltalk.compiler.tests + +: test-compilation ( ast -- quot ) + [ + 1array ast-sequence new swap >>body + compile-smalltalk [ call ] append + ] with-compilation-unit ; + +: test-inference ( ast -- in# out# ) + test-compilation infer [ in>> ] [ out>> ] bi ; + +[ 2 1 ] [ + T{ ast-block f + { "a" "b" } + { + T{ ast-message-send f + T{ ast-name f "a" } + "+" + { T{ ast-name f "b" } } + } + } + } test-inference +] unit-test + +[ 3 1 ] [ + T{ ast-block f + { "a" "b" "c" } + { + T{ ast-assignment f + T{ ast-name f "a" } + T{ ast-message-send f + T{ ast-name f "c" } + "+" + { T{ ast-name f "b" } } + } + } + T{ ast-message-send f + T{ ast-name f "b" } + "blah:" + { 123.456 } + } + T{ ast-return f T{ ast-name f "c" } } + } + } test-inference +] unit-test + +[ 0 1 ] [ + T{ ast-block f + { } + { } + { + T{ ast-message-send + { receiver 1 } + { selector "to:do:" } + { arguments + { + 10 + T{ ast-block + { arguments { "i" } } + { body + { + T{ ast-message-send + { receiver + T{ ast-name { name "i" } } + } + { selector "print" } + } + } + } + } + } + } + } + } + } test-inference +] unit-test + +[ "a" ] [ + T{ ast-block f + { } + { } + { { T{ ast-block { body { "a" } } } } } + } test-compilation call first call +] unit-test \ No newline at end of file diff --git a/extra/smalltalk/compiler/compiler.factor b/extra/smalltalk/compiler/compiler.factor new file mode 100644 index 0000000000..2eeee30692 --- /dev/null +++ b/extra/smalltalk/compiler/compiler.factor @@ -0,0 +1,157 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs combinators.short-circuit +continuations fry kernel namespaces quotations sequences sets +generalizations slots locals.types splitting math +locals.rewrite.closures generic words combinators locals smalltalk.ast +smalltalk.compiler.lexenv smalltalk.compiler.assignment +smalltalk.compiler.return smalltalk.selectors smalltalk.classes ; +IN: smalltalk.compiler + +GENERIC: compile-ast ( lexenv ast -- quot ) + +M: object compile-ast nip 1quotation ; + +M: self compile-ast drop self>> 1quotation ; + +ERROR: unbound-local name ; + +M: ast-name compile-ast name>> swap lookup-reader ; + +: compile-arguments ( lexenv ast -- quot ) + arguments>> [ compile-ast ] with map [ ] join ; + +: compile-new ( lexenv ast -- quot ) + [ receiver>> compile-ast ] + [ compile-arguments ] 2bi + [ new ] 3append ; + +: compile-ifTrue:ifFalse: ( lexenv ast -- quot ) + [ receiver>> compile-ast ] + [ compile-arguments ] 2bi + [ if ] 3append ; + +M: ast-message-send compile-ast + dup selector>> { + { "ifTrue:ifFalse:" [ compile-ifTrue:ifFalse: ] } + { "new" [ compile-new ] } + [ + drop + [ compile-arguments ] + [ receiver>> compile-ast ] + [ nip selector>> selector>generic ] + 2tri [ append ] dip suffix + ] + } case ; + +M: ast-cascade compile-ast + [ receiver>> compile-ast ] + [ + messages>> [ + [ compile-arguments \ dip ] + [ selector>> selector>generic ] bi + [ ] 3sequence + ] with map + unclip-last [ [ [ drop ] append ] map ] dip suffix + cleave>quot + ] 2bi append ; + +M: ast-return compile-ast + [ value>> compile-ast ] [ drop return>> 1quotation ] 2bi + [ continue-with ] 3append ; + +: (compile-sequence) ( lexenv asts -- quot ) + [ drop [ nil ] ] [ + [ compile-ast ] with map [ drop ] join + ] if-empty ; + +: block-lexenv ( block -- lexenv ) + [ [ arguments>> ] [ temporaries>> ] bi append ] + [ body>> [ assigned-locals ] map concat unique ] bi + '[ + dup dup _ key? + [ <local-reader> ] + [ <local> ] + if + ] H{ } map>assoc + dup + [ nip local-reader? ] assoc-filter + [ <local-writer> ] assoc-map + <lexenv> swap >>local-writers swap >>local-readers ; + +: lookup-block-vars ( vars lexenv -- seq ) + local-readers>> '[ _ at ] map ; + +: make-temporaries ( block lexenv -- quot ) + [ temporaries>> ] dip lookup-block-vars + [ <def> [ f ] swap suffix ] map [ ] join ; + +:: compile-sequence ( lexenv block -- vars quot ) + lexenv block block-lexenv lexenv-union :> lexenv + block arguments>> lexenv lookup-block-vars + lexenv block body>> (compile-sequence) block lexenv make-temporaries prepend ; + +M: ast-sequence compile-ast + compile-sequence nip ; + +GENERIC: contains-blocks? ( obj -- ? ) + +M: ast-block contains-blocks? drop t ; + +M: object contains-blocks? drop f ; + +M: array contains-blocks? [ contains-blocks? ] any? ; + +M: array compile-ast + dup contains-blocks? [ + [ [ compile-ast ] with map [ ] join ] [ length ] bi + '[ @ _ narray ] + ] [ call-next-method ] if ; + +GENERIC: compile-assignment ( lexenv name -- quot ) + +M: ast-name compile-assignment name>> swap lookup-writer ; + +M: ast-assignment compile-ast + [ value>> compile-ast [ dup ] ] [ name>> compile-assignment ] 2bi 3append ; + +M: ast-block compile-ast + compile-sequence <lambda> '[ _ ] ; + +:: (compile-method-body) ( lexenv block -- lambda ) + lexenv block compile-sequence + [ lexenv self>> suffix ] dip <lambda> ; + +: compile-method-body ( lexenv block -- quot ) + [ [ (compile-method-body) ] [ arguments>> length 1+ ] bi ] 2keep + make-return ; + +: compile-method ( lexenv ast-method -- ) + [ [ class>> ] [ name>> selector>generic ] bi* create-method ] + [ body>> compile-method-body ] + 2bi define ; + +: <class-lexenv> ( class -- lexenv ) + <lexenv> swap >>class "self" <local> >>self "^" <local> >>return ; + +M: ast-class compile-ast + nip + [ + [ name>> ] [ superclass>> ] [ ivars>> ] tri + define-class <class-lexenv> + ] + [ methods>> ] bi + [ compile-method ] with each + [ nil ] ; + +ERROR: no-word name ; + +M: ast-foreign compile-ast + nip + [ class>> dup ":" split1 lookup [ ] [ no-word ] ?if ] + [ name>> ] bi define-foreign + [ nil ] ; + +: compile-smalltalk ( statement -- quot ) + [ empty-lexenv ] dip [ compile-sequence nip 0 ] + 2keep make-return ; \ No newline at end of file diff --git a/extra/smalltalk/compiler/lexenv/authors.txt b/extra/smalltalk/compiler/lexenv/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/compiler/lexenv/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/compiler/lexenv/lexenv-tests.factor b/extra/smalltalk/compiler/lexenv/lexenv-tests.factor new file mode 100644 index 0000000000..8f171f3eed --- /dev/null +++ b/extra/smalltalk/compiler/lexenv/lexenv-tests.factor @@ -0,0 +1,24 @@ +USING: smalltalk.compiler.lexenv tools.test kernel namespaces accessors ; +IN: smalltalk.compiler.lexenv.tests + +TUPLE: some-class x y z ; + +SYMBOL: fake-self + +SYMBOL: fake-local + +<lexenv> + some-class >>class + fake-self >>self + H{ { "mumble" fake-local } } >>local-readers + H{ { "jumble" fake-local } } >>local-writers +lexenv set + +[ [ fake-local ] ] [ "mumble" lexenv get lookup-reader ] unit-test +[ [ fake-self x>> ] ] [ "x" lexenv get lookup-reader ] unit-test +[ [ \ tuple ] ] [ "Object" lexenv get lookup-reader ] unit-test + +[ [ fake-local ] ] [ "jumble" lexenv get lookup-writer ] unit-test +[ [ fake-self (>>y) ] ] [ "y" lexenv get lookup-writer ] unit-test + +[ "blahblah" lexenv get lookup-writer ] must-fail \ No newline at end of file diff --git a/extra/smalltalk/compiler/lexenv/lexenv.factor b/extra/smalltalk/compiler/lexenv/lexenv.factor new file mode 100644 index 0000000000..cd06314fd9 --- /dev/null +++ b/extra/smalltalk/compiler/lexenv/lexenv.factor @@ -0,0 +1,67 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: assocs kernel accessors quotations slots words +sequences namespaces combinators combinators.short-circuit +summary smalltalk.classes ; +IN: smalltalk.compiler.lexenv + +! local-readers: assoc string => word +! local-writers: assoc string => word +! self: word or f for top-level forms +! class: class word or f for top-level forms +! method: generic word or f for top-level forms +TUPLE: lexenv local-readers local-writers self return class method ; + +: <lexenv> ( -- lexenv ) lexenv new ; inline + +CONSTANT: empty-lexenv T{ lexenv } + +: lexenv-union ( lexenv1 lexenv2 -- lexenv ) + [ <lexenv> ] 2dip { + [ [ local-readers>> ] bi@ assoc-union >>local-readers ] + [ [ local-writers>> ] bi@ assoc-union >>local-writers ] + [ [ self>> ] either? >>self ] + [ [ return>> ] either? >>return ] + [ [ class>> ] either? >>class ] + [ [ method>> ] either? >>method ] + } 2cleave ; + +: local-reader ( name lexenv -- local ) + local-readers>> at dup [ 1quotation ] when ; + +: ivar-reader ( name lexenv -- quot/f ) + dup class>> [ + [ class>> "slots" word-prop slot-named ] [ self>> ] bi + swap dup [ name>> reader-word [ ] 2sequence ] [ 2drop f ] if + ] [ 2drop f ] if ; + +: class-name ( name -- quot/f ) + classes get at dup [ [ ] curry ] when ; + +ERROR: bad-identifier name ; + +M: bad-identifier summary drop "Unknown identifier" ; + +: lookup-reader ( name lexenv -- reader-quot ) + { + [ local-reader ] + [ ivar-reader ] + [ drop class-name ] + [ drop bad-identifier ] + } 2|| ; + +: local-writer ( name lexenv -- local ) + local-writers>> at dup [ 1quotation ] when ; + +: ivar-writer ( name lexenv -- quot/f ) + dup class>> [ + [ class>> "slots" word-prop slot-named ] [ self>> ] bi + swap dup [ name>> writer-word [ ] 2sequence ] [ 2drop f ] if + ] [ 2drop f ] if ; + +: lookup-writer ( name lexenv -- writer-quot ) + { + [ local-writer ] + [ ivar-writer ] + [ drop bad-identifier ] + } 2|| ; \ No newline at end of file diff --git a/extra/smalltalk/compiler/return/authors.txt b/extra/smalltalk/compiler/return/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/compiler/return/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/compiler/return/return-tests.factor b/extra/smalltalk/compiler/return/return-tests.factor new file mode 100644 index 0000000000..15a3406ffc --- /dev/null +++ b/extra/smalltalk/compiler/return/return-tests.factor @@ -0,0 +1,3 @@ +USING: smalltalk.parser smalltalk.compiler.return tools.test ; + +[ t ] [ "(i <= 1) ifTrue: [^1] ifFalse: [^((Fib new i:(i-1)) compute + (Fib new i:(i-2)) compute)]" parse-smalltalk need-return-continuation? ] unit-test \ No newline at end of file diff --git a/extra/smalltalk/compiler/return/return.factor b/extra/smalltalk/compiler/return/return.factor new file mode 100644 index 0000000000..8c36bdac64 --- /dev/null +++ b/extra/smalltalk/compiler/return/return.factor @@ -0,0 +1,45 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays combinators.short-circuit continuations +fry generalizations kernel locals locals.types locals.rewrite.closures +namespaces make sequences smalltalk.ast ; +IN: smalltalk.compiler.return + +SYMBOL: return-continuation + +GENERIC: need-return-continuation? ( ast -- ? ) + +M: ast-return need-return-continuation? drop t ; + +M: ast-block need-return-continuation? body>> need-return-continuation? ; + +M: ast-message-send need-return-continuation? + { + [ receiver>> need-return-continuation? ] + [ arguments>> need-return-continuation? ] + } 1|| ; + +M: ast-cascade need-return-continuation? + { + [ receiver>> need-return-continuation? ] + [ messages>> need-return-continuation? ] + } 1|| ; + +M: ast-message need-return-continuation? + arguments>> need-return-continuation? ; + +M: ast-assignment need-return-continuation? + value>> need-return-continuation? ; + +M: ast-sequence need-return-continuation? + body>> need-return-continuation? ; + +M: array need-return-continuation? [ need-return-continuation? ] any? ; + +M: object need-return-continuation? drop f ; + +:: make-return ( quot n lexenv block -- quot ) + block need-return-continuation? [ + quot clone [ lexenv return>> <def> '[ _ ] prepend ] change-body + n '[ _ _ ncurry callcc1 ] + ] [ quot ] if rewrite-closures first ; \ No newline at end of file diff --git a/extra/smalltalk/eval/authors.txt b/extra/smalltalk/eval/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/eval/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/eval/eval-tests.factor b/extra/smalltalk/eval/eval-tests.factor new file mode 100644 index 0000000000..95366d65b9 --- /dev/null +++ b/extra/smalltalk/eval/eval-tests.factor @@ -0,0 +1,11 @@ +IN: smalltalk.eval.tests +USING: smalltalk.eval tools.test io.streams.string kernel ; + +[ 3 ] [ "1+2" eval-smalltalk ] unit-test +[ "HAI" ] [ "(1<10) ifTrue:['HAI'] ifFalse:['BAI']" eval-smalltalk ] unit-test +[ 7 ] [ "1+2+3;+4" eval-smalltalk ] unit-test +[ 6 "5\n6\n" ] [ [ "[:x|x print] value: 5; value: 6" eval-smalltalk ] with-string-writer ] unit-test +[ 5 ] [ "|x| x:=5. x" eval-smalltalk ] unit-test +[ 11 ] [ "[:i| |x| x:=5. i+x] value: 6" eval-smalltalk ] unit-test +[ t ] [ "class Blah [method foo [5]]. Blah new foo" eval-smalltalk tuple? ] unit-test +[ 196418 ] [ "vocab:smalltalk/eval/fib.st" eval-smalltalk-file ] unit-test \ No newline at end of file diff --git a/extra/smalltalk/eval/eval.factor b/extra/smalltalk/eval/eval.factor new file mode 100644 index 0000000000..56841beafd --- /dev/null +++ b/extra/smalltalk/eval/eval.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: io.files io.encodings.utf8 +compiler.units smalltalk.parser smalltalk.compiler +smalltalk.library ; +IN: smalltalk.eval + +: eval-smalltalk ( string -- result ) + [ parse-smalltalk compile-smalltalk ] with-compilation-unit + call( -- result ) ; + +: eval-smalltalk-file ( path -- result ) + utf8 file-contents eval-smalltalk ; diff --git a/extra/smalltalk/eval/fib.st b/extra/smalltalk/eval/fib.st new file mode 100644 index 0000000000..41ab8f56cc --- /dev/null +++ b/extra/smalltalk/eval/fib.st @@ -0,0 +1,11 @@ +class Fib [ + |i| + method i: newI [i:=newI]. + method compute [ + (i <= 1) + ifTrue: [^1] + ifFalse: [^((Fib new i:(i-1)) compute + (Fib new i:(i-2)) compute)] + ]. +]. + +[(Fib new i: 26) compute] time \ No newline at end of file diff --git a/extra/smalltalk/library/authors.txt b/extra/smalltalk/library/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/library/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/library/library.factor b/extra/smalltalk/library/library.factor new file mode 100644 index 0000000000..28acf98dff --- /dev/null +++ b/extra/smalltalk/library/library.factor @@ -0,0 +1,101 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel present io math sequences assocs math.ranges +math.order fry tools.time locals smalltalk.selectors +smalltalk.ast smalltalk.classes ; +IN: smalltalk.library + +SELECTOR: print +SELECTOR: asString + +M: object selector-print dup present print ; +M: object selector-asString present ; + +SELECTOR: print: +SELECTOR: nextPutAll: +SELECTOR: tab +SELECTOR: nl + +M: object selector-print: [ present ] dip stream-print nil ; +M: object selector-nextPutAll: selector-print: ; +M: object selector-tab " " swap selector-print: ; +M: object selector-nl stream-nl nil ; + +SELECTOR: + +SELECTOR: - +SELECTOR: * +SELECTOR: / +SELECTOR: < +SELECTOR: > +SELECTOR: <= +SELECTOR: >= +SELECTOR: = + +M: object selector-+ swap + ; +M: object selector-- swap - ; +M: object selector-* swap * ; +M: object selector-/ swap / ; +M: object selector-< swap < ; +M: object selector-> swap > ; +M: object selector-<= swap <= ; +M: object selector->= swap >= ; +M: object selector-= swap = ; + +SELECTOR: min: +SELECTOR: max: + +M: object selector-min: min ; +M: object selector-max: max ; + +SELECTOR: ifTrue: +SELECTOR: ifFalse: +SELECTOR: ifTrue:ifFalse: + +M: object selector-ifTrue: [ call( -- result ) ] [ drop nil ] if ; +M: object selector-ifFalse: [ drop nil ] [ call( -- result ) ] if ; +M: object selector-ifTrue:ifFalse: [ drop call( -- result ) ] [ nip call( -- result ) ] if ; + +SELECTOR: isNil + +M: object selector-isNil nil eq? ; + +SELECTOR: at: +SELECTOR: at:put: + +M: sequence selector-at: nth ; +M: sequence selector-at:put: ( key value receiver -- receiver ) [ swapd set-nth ] keep ; + +M: assoc selector-at: at ; +M: assoc selector-at:put: ( key value receiver -- receiver ) [ swapd set-at ] keep ; + +SELECTOR: do: + +M:: object selector-do: ( quot receiver -- nil ) + receiver [ quot call( elt -- result ) drop ] each nil ; + +SELECTOR: to: +SELECTOR: to:do: + +M: object selector-to: swap [a,b] ; +M:: object selector-to:do: ( to quot from -- nil ) + from to [a,b] [ quot call( i -- result ) drop ] each nil ; + +SELECTOR: value +SELECTOR: value: +SELECTOR: value:value: +SELECTOR: value:value:value: +SELECTOR: value:value:value:value: + +M: object selector-value call( -- result ) ; +M: object selector-value: call( input -- result ) ; +M: object selector-value:value: call( input input -- result ) ; +M: object selector-value:value:value: call( input input input -- result ) ; +M: object selector-value:value:value:value: call( input input input input -- result ) ; + +SELECTOR: new + +M: object selector-new new ; + +SELECTOR: time + +M: object selector-time '[ _ call( -- result ) ] time ; \ No newline at end of file diff --git a/extra/smalltalk/listener/authors.txt b/extra/smalltalk/listener/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/listener/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/listener/listener.factor b/extra/smalltalk/listener/listener.factor new file mode 100644 index 0000000000..dc84fd90fb --- /dev/null +++ b/extra/smalltalk/listener/listener.factor @@ -0,0 +1,18 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel prettyprint io io.styles colors.constants compiler.units +fry debugger sequences locals.rewrite.closures smalltalk.ast +smalltalk.eval smalltalk.printer smalltalk.listener ; +IN: smalltalk.listener + +: eval-interactively ( string -- ) + '[ + _ eval-smalltalk + dup nil? [ drop ] [ "Result: " write smalltalk>string print ] if + ] try ; + +: smalltalk-listener ( -- ) + "Smalltalk>" { { background COLOR: light-blue } } format bl flush readln + [ eval-interactively smalltalk-listener ] when* ; + +MAIN: smalltalk-listener \ No newline at end of file diff --git a/extra/smalltalk/parser/authors.txt b/extra/smalltalk/parser/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/parser/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/parser/parser-tests.factor b/extra/smalltalk/parser/parser-tests.factor new file mode 100644 index 0000000000..9027290e6a --- /dev/null +++ b/extra/smalltalk/parser/parser-tests.factor @@ -0,0 +1,300 @@ +IN: smalltalk.parser.tests +USING: smalltalk.parser smalltalk.ast +peg.ebnf tools.test accessors +io.files io.encodings.ascii kernel ; + +EBNF: test-Character +test = <foreign parse-smalltalk Character> +;EBNF + +[ CHAR: a ] [ "a" test-Character ] unit-test + +EBNF: test-Comment +test = <foreign parse-smalltalk Comment> +;EBNF + +[ T{ ast-comment f "Hello, this is a comment." } ] +[ "\"Hello, this is a comment.\"" test-Comment ] +unit-test + +[ T{ ast-comment f "Hello, \"this\" is a comment." } ] +[ "\"Hello, \"\"this\"\" is a comment.\"" test-Comment ] +unit-test + +EBNF: test-Identifier +test = <foreign parse-smalltalk Identifier> +;EBNF + +[ "OrderedCollection" ] [ "OrderedCollection" test-Identifier ] unit-test + +EBNF: test-Literal +test = <foreign parse-smalltalk Literal> +;EBNF + +[ nil ] [ "nil" test-Literal ] unit-test +[ 123 ] [ "123" test-Literal ] unit-test +[ HEX: deadbeef ] [ "16rdeadbeef" test-Literal ] unit-test +[ -123 ] [ "-123" test-Literal ] unit-test +[ 1.2 ] [ "1.2" test-Literal ] unit-test +[ -1.24 ] [ "-1.24" test-Literal ] unit-test +[ 12.4e7 ] [ "12.4e7" test-Literal ] unit-test +[ 12.4e-7 ] [ "12.4e-7" test-Literal ] unit-test +[ -12.4e7 ] [ "-12.4e7" test-Literal ] unit-test +[ CHAR: x ] [ "$x" test-Literal ] unit-test +[ "Hello, world" ] [ "'Hello, world'" test-Literal ] unit-test +[ "Hello, 'funny' world" ] [ "'Hello, ''funny'' world'" test-Literal ] unit-test +[ T{ symbol f "foo" } ] [ "#foo" test-Literal ] unit-test +[ T{ symbol f "+" } ] [ "#+" test-Literal ] unit-test +[ T{ symbol f "at:put:" } ] [ "#at:put:" test-Literal ] unit-test +[ T{ symbol f "Hello world" } ] [ "#'Hello world'" test-Literal ] unit-test +[ B{ 1 2 3 4 } ] [ "#[1 2 3 4]" test-Literal ] unit-test +[ { nil t f } ] [ "#(nil true false)" test-Literal ] unit-test +[ { nil { t f } } ] [ "#(nil (true false))" test-Literal ] unit-test +[ T{ ast-block f { } { } { } } ] [ "[]" test-Literal ] unit-test +[ T{ ast-block f { "x" } { } { T{ ast-return f T{ ast-name f "x" } } } } ] [ "[ :x|^x]" test-Literal ] unit-test +[ T{ ast-block f { } { } { T{ ast-return f self } } } ] [ "[^self]" test-Literal ] unit-test + +[ + T{ ast-block + { arguments { "i" } } + { body + { + T{ ast-message-send + { receiver T{ ast-name { name "i" } } } + { selector "print" } + } + } + } + } +] +[ "[ :i | i print ]" test-Literal ] unit-test + +[ + T{ ast-block + { body { 5 self } } + } +] +[ "[5. self]" test-Literal ] unit-test + +EBNF: test-FormalBlockArgumentDeclarationList +test = <foreign parse-smalltalk FormalBlockArgumentDeclarationList> +;EBNF + +[ V{ "x" "y" "elt" } ] [ ":x :y :elt" test-FormalBlockArgumentDeclarationList ] unit-test + +EBNF: test-Operand +test = <foreign parse-smalltalk Operand> +;EBNF + +[ { 123 15.6 { t f } } ] [ "#(123 15.6 (true false))" test-Operand ] unit-test +[ T{ ast-name f "x" } ] [ "x" test-Operand ] unit-test + +EBNF: test-Expression +test = <foreign parse-smalltalk Expression> +;EBNF + +[ self ] [ "self" test-Expression ] unit-test +[ { 123 15.6 { t f } } ] [ "#(123 15.6 (true false))" test-Expression ] unit-test +[ T{ ast-name f "x" } ] [ "x" test-Expression ] unit-test +[ T{ ast-message-send f 5 "print" { } } ] [ "5 print" test-Expression ] unit-test +[ T{ ast-message-send f T{ ast-message-send f 5 "squared" { } } "print" { } } ] [ "5 squared print" test-Expression ] unit-test +[ T{ ast-message-send f 2 "+" { 2 } } ] [ "2+2" test-Expression ] unit-test + +[ + T{ ast-message-send f + T{ ast-message-send f 3 "factorial" { } } + "+" + { T{ ast-message-send f 4 "factorial" { } } } + } +] +[ "3 factorial + 4 factorial" test-Expression ] unit-test + +[ + T{ ast-message-send f + T{ ast-message-send f 3 "factorial" { } } + "+" + { T{ ast-message-send f 4 "factorial" { } } } + } +] +[ " 3 factorial + 4 factorial" test-Expression ] unit-test + +[ + T{ ast-message-send f + T{ ast-message-send f 3 "factorial" { } } + "+" + { T{ ast-message-send f 4 "factorial" { } } } + } +] +[ " 3 factorial + 4 factorial " test-Expression ] unit-test + +[ + T{ ast-message-send f + T{ ast-message-send f + T{ ast-message-send f 3 "factorial" { } } + "+" + { 4 } + } + "factorial" + { } + } +] +[ "(3 factorial + 4) factorial" test-Expression ] unit-test + +[ + T{ ast-message-send + { receiver + T{ ast-message-send + { receiver + T{ ast-message-send + { receiver 1 } + { selector "<" } + { arguments { 10 } } + } + } + { selector "ifTrue:ifFalse:" } + { arguments + { + T{ ast-block { body { "HI" } } } + T{ ast-block { body { "BYE" } } } + } + } + } + } + { selector "print" } + } +] +[ "((1 < 10) ifTrue: [ 'HI' ] ifFalse: [ 'BYE' ]) print" test-Expression ] unit-test + +[ + T{ ast-cascade + { receiver 12 } + { messages + { + T{ ast-message f "sqrt" } + T{ ast-message f "+" { 2 } } + } + } + } +] +[ "12 sqrt; + 2" test-Expression ] unit-test + +[ + T{ ast-cascade + { receiver T{ ast-message-send f 12 "sqrt" } } + { messages + { + T{ ast-message f "+" { 1 } } + T{ ast-message f "+" { 2 } } + } + } + } +] +[ "12 sqrt + 1; + 2" test-Expression ] unit-test + +[ + T{ ast-cascade + { receiver T{ ast-message-send f 12 "squared" } } + { messages + { + T{ ast-message f "to:" { 100 } } + T{ ast-message f "sqrt" } + } + } + } +] +[ "12 squared to: 100; sqrt" test-Expression ] unit-test + +[ + T{ ast-message-send f + T{ ast-message-send f 1 "+" { 2 } } + "*" + { 3 } + } +] +[ "1+2*3" test-Expression ] unit-test + +[ + T{ ast-message-send + { receiver + T{ ast-message-send + { receiver { T{ ast-block { body { "a" } } } } } + { selector "at:" } + { arguments { 0 } } + } + } + { selector "value" } + } +] +[ "(#(['a']) at: 0) value" test-Expression ] unit-test + +EBNF: test-FinalStatement +test = <foreign parse-smalltalk FinalStatement> +;EBNF + +[ T{ ast-name f "value" } ] [ "value" test-FinalStatement ] unit-test +[ T{ ast-return f T{ ast-name f "value" } } ] [ "^value" test-FinalStatement ] unit-test +[ T{ ast-assignment f T{ ast-name f "value" } 5 } ] [ "value:=5" test-FinalStatement ] unit-test + +EBNF: test-LocalVariableDeclarationList +test = <foreign parse-smalltalk LocalVariableDeclarationList> +;EBNF + +[ T{ ast-local-variables f { "i" "j" } } ] [ " | i j |" test-LocalVariableDeclarationList ] unit-test + + +[ T{ ast-message-send f T{ ast-name f "x" } "foo:bar:" { 1 2 } } ] +[ "x foo:1 bar:2" test-Expression ] unit-test + +[ + T{ ast-message-send + f + T{ ast-message-send f + T{ ast-message-send f 3 "factorial" { } } + "+" + { T{ ast-message-send f 4 "factorial" { } } } + } + "between:and:" + { 10 100 } + } +] +[ "3 factorial + 4 factorial between: 10 and: 100" test-Expression ] unit-test + +[ T{ ast-sequence f { } { 1 2 } } ] [ "1. 2" parse-smalltalk ] unit-test + +[ T{ ast-sequence f { } { 1 2 } } ] [ "1. 2." parse-smalltalk ] unit-test + +[ + T{ ast-sequence f { } + { + T{ ast-class + { name "Test" } + { superclass "Object" } + { ivars { "a" } } + } + } + } +] +[ "class Test [|a|]" parse-smalltalk ] unit-test + +[ + T{ ast-sequence f { } + { + T{ ast-class + { name "Test1" } + { superclass "Object" } + { ivars { "a" } } + } + + T{ ast-class + { name "Test2" } + { superclass "Test1" } + { ivars { "b" } } + } + } + } +] +[ "class Test1 [|a|]. class Test2 extends Test1 [|b|]" parse-smalltalk ] unit-test + +[ ] [ "class Foo []. Tests blah " parse-smalltalk drop ] unit-test + +[ ] [ "vocab:smalltalk/parser/test.st" ascii file-contents parse-smalltalk drop ] unit-test \ No newline at end of file diff --git a/extra/smalltalk/parser/parser.factor b/extra/smalltalk/parser/parser.factor new file mode 100644 index 0000000000..c7cafe94dd --- /dev/null +++ b/extra/smalltalk/parser/parser.factor @@ -0,0 +1,228 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: peg peg.ebnf smalltalk.ast sequences sequences.deep strings +math.parser kernel arrays byte-arrays math assocs accessors ; +IN: smalltalk.parser + +! :mode=text:noTabs=true: + +! Based on http://chronos-st.blogspot.com/2007/12/smalltalk-in-one-page.html + +ERROR: bad-number str ; + +: check-number ( str -- n ) + >string dup string>number [ ] [ bad-number ] ?if ; + +EBNF: parse-smalltalk + +Character = . +WhitespaceCharacter = (" " | "\t" | "\n" | "\r" ) +DecimalDigit = [0-9] +Letter = [A-Za-z] + +CommentCharacter = [^"] | '""' => [[ CHAR: " ]] +Comment = '"' (CommentCharacter)*:s '"' => [[ s >string ast-comment boa ]] + +OptionalWhiteSpace = (WhitespaceCharacter | Comment)* +Whitespace = (WhitespaceCharacter | Comment)+ + +LetterOrDigit = DecimalDigit | Letter +Identifier = (Letter | "_"):h (LetterOrDigit | "_")*:t => [[ { h t } flatten >string ]] +Reference = Identifier => [[ ast-name boa ]] + +ConstantReference = "nil" => [[ nil ]] + | "false" => [[ f ]] + | "true" => [[ t ]] +PseudoVariableReference = "self" => [[ self ]] + | "super" => [[ super ]] +ReservedIdentifier = PseudoVariableReference | ConstantReference + +BindableIdentifier = Identifier + +UnaryMessageSelector = Identifier + +Keyword = Identifier:i ":" => [[ i ":" append ]] + +KeywordMessageSelector = Keyword+ => [[ concat ]] +BinarySelectorChar = "~" | "!" | "@" | "%" | "&" | "*" | "-" | "+" + | "=" | "|" | "\" | "<" | ">" | "," | "?" | "/" +BinaryMessageSelector = BinarySelectorChar+ => [[ concat ]] + +OptionalMinus = ("-" => [[ CHAR: - ]])? +IntegerLiteral = (OptionalMinus:m UnsignedIntegerLiteral:i) => [[ i m [ neg ] when ]] +UnsignedIntegerLiteral = Radix:r "r" BaseNIntegerLiteral:b => [[ b >string r base> ]] + | DecimalIntegerLiteral => [[ check-number ]] +DecimalIntegerLiteral = DecimalDigit+ +Radix = DecimalIntegerLiteral => [[ check-number ]] +BaseNIntegerLiteral = LetterOrDigit+ +FloatingPointLiteral = (OptionalMinus + DecimalIntegerLiteral + ("." => [[ CHAR: . ]] DecimalIntegerLiteral Exponent? | Exponent)) + => [[ flatten check-number ]] +Exponent = "e" => [[ CHAR: e ]] (OptionalMinus DecimalIntegerLiteral)? + +CharacterLiteral = "$" Character:c => [[ c ]] + +StringLiteral = "'" (StringLiteralCharacter | "''" => [[ CHAR: ' ]])*:s "'" + => [[ s >string ]] +StringLiteralCharacter = [^'] + +SymbolInArrayLiteral = KeywordMessageSelector + | UnaryMessageSelector + | BinaryMessageSelector +SymbolLiteral = "#" (SymbolInArrayLiteral | StringLiteral):s => [[ s intern ]] + +ArrayLiteral = (ObjectArrayLiteral | ByteArrayLiteral) +ObjectArrayLiteral = "#" NestedObjectArrayLiteral:elts => [[ elts ]] +NestedObjectArrayLiteral = "(" OptionalWhiteSpace + (LiteralArrayElement:h + (Whitespace LiteralArrayElement:e => [[ e ]])*:t + => [[ t h prefix ]] + )?:elts OptionalWhiteSpace ")" => [[ elts >array ]] + +LiteralArrayElement = Literal + | NestedObjectArrayLiteral + | SymbolInArrayLiteral + | ConstantReference + +ByteArrayLiteral = "#[" OptionalWhiteSpace + (UnsignedIntegerLiteral:h + (Whitespace UnsignedIntegerLiteral:i => [[ i ]])*:t + => [[ t h prefix ]] + )?:elts OptionalWhiteSpace "]" => [[ elts >byte-array ]] + +FormalBlockArgumentDeclaration = ":" BindableIdentifier:i => [[ i ]] +FormalBlockArgumentDeclarationList = + FormalBlockArgumentDeclaration:h + (Whitespace FormalBlockArgumentDeclaration:v => [[ v ]])*:t + => [[ t h prefix ]] + +BlockLiteral = "[" + (OptionalWhiteSpace + FormalBlockArgumentDeclarationList:args + OptionalWhiteSpace + "|" + => [[ args ]] + )?:args + ExecutableCode:body + "]" => [[ args >array body <ast-block> ]] + +Literal = (ConstantReference + | FloatingPointLiteral + | IntegerLiteral + | CharacterLiteral + | StringLiteral + | ArrayLiteral + | SymbolLiteral + | BlockLiteral) + +NestedExpression = "(" Statement:s OptionalWhiteSpace ")" => [[ s ]] +Operand = Literal + | PseudoVariableReference + | Reference + | NestedExpression + +UnaryMessage = OptionalWhiteSpace + UnaryMessageSelector:s !(":") + => [[ s { } ast-message boa ]] + +BinaryMessage = OptionalWhiteSpace + BinaryMessageSelector:selector + OptionalWhiteSpace + (UnaryMessageSend | Operand):rhs + => [[ selector { rhs } ast-message boa ]] + +KeywordMessageSegment = Keyword:k OptionalWhiteSpace (BinaryMessageSend | UnaryMessageSend | Operand):arg => [[ { k arg } ]] +KeywordMessage = OptionalWhiteSpace + KeywordMessageSegment:h + (OptionalWhiteSpace KeywordMessageSegment:s => [[ s ]])*:t + => [[ t h prefix unzip [ concat ] dip ast-message boa ]] + +Message = BinaryMessage | UnaryMessage | KeywordMessage + +UnaryMessageSend = (UnaryMessageSend | Operand):lhs + UnaryMessage:h + (OptionalWhiteSpace ";" Message:m => [[ m ]])*:t + => [[ lhs t h prefix >array <ast-cascade> ]] + +BinaryMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs + BinaryMessage:h + (OptionalWhiteSpace ";" Message:m => [[ m ]])*:t + => [[ lhs t h prefix >array <ast-cascade> ]] + +KeywordMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs + KeywordMessage:h + (OptionalWhiteSpace ";" Message:m => [[ m ]])*:t + => [[ lhs t h prefix >array <ast-cascade> ]] + +Expression = OptionalWhiteSpace + (KeywordMessageSend | BinaryMessageSend | UnaryMessageSend | Operand):e + => [[ e ]] + +AssignmentOperation = OptionalWhiteSpace BindableIdentifier:i + OptionalWhiteSpace ":=" OptionalWhiteSpace => [[ i ast-name boa ]] +AssignmentStatement = AssignmentOperation:a Statement:s => [[ a s ast-assignment boa ]] +Statement = ClassDeclaration | ForeignClassDeclaration | AssignmentStatement | Expression + +MethodReturnOperator = OptionalWhiteSpace "^" +FinalStatement = (MethodReturnOperator Statement:s => [[ s ast-return boa ]]) + | Statement + +LocalVariableDeclarationList = OptionalWhiteSpace "|" OptionalWhiteSpace + (BindableIdentifier:h + (Whitespace BindableIdentifier:b => [[ b ]])*:t + => [[ t h prefix ]] + )?:b OptionalWhiteSpace "|" => [[ b >array ast-local-variables boa ]] + +EndStatement = "." + +ExecutableCode = (LocalVariableDeclarationList)?:locals + (Statement:s OptionalWhiteSpace EndStatement => [[ s ]])*:h + (FinalStatement:t (EndStatement)? => [[ t ]])?:t + OptionalWhiteSpace + => [[ h t [ suffix ] when* locals [ prefix ] when* >array ]] + +TopLevelForm = ExecutableCode => [[ <ast-sequence> ]] + +UnaryMethodHeader = UnaryMessageSelector:selector + => [[ { selector { } } ]] +BinaryMethodHeader = BinaryMessageSelector:selector OptionalWhiteSpace BindableIdentifier:identifier + => [[ { selector { identifier } } ]] +KeywordMethodHeaderSegment = Keyword:keyword + OptionalWhiteSpace + BindableIdentifier:identifier => [[ { keyword identifier } ]] +KeywordMethodHeader = KeywordMethodHeaderSegment:h (Whitespace KeywordMethodHeaderSegment:s => [[ s ]])*:t + => [[ t h prefix unzip [ concat ] dip 2array ]] +MethodHeader = KeywordMethodHeader + | BinaryMethodHeader + | UnaryMethodHeader +MethodDeclaration = OptionalWhiteSpace "method" OptionalWhiteSpace MethodHeader:header + OptionalWhiteSpace "[" + ExecutableCode:code + "]" + => [[ header first2 code <ast-method> ]] + +ClassDeclaration = OptionalWhiteSpace "class" OptionalWhiteSpace Identifier:name + OptionalWhiteSpace + ("extends" OptionalWhiteSpace Identifier:superclass OptionalWhiteSpace => [[ superclass ]])?:superclass + OptionalWhiteSpace "[" + (OptionalWhiteSpace LocalVariableDeclarationList:l => [[ l names>> ]])?:ivars + (MethodDeclaration:h + (OptionalWhiteSpace + EndStatement + OptionalWhiteSpace + MethodDeclaration:m => [[ m ]])*:t (EndStatement)? + => [[ t h prefix ]] + )?:methods + OptionalWhiteSpace "]" + => [[ name superclass "Object" or ivars >array methods >array ast-class boa ]] + +ForeignClassDeclaration = OptionalWhiteSpace "foreign" + OptionalWhiteSpace Identifier:name + OptionalWhiteSpace Literal:class + => [[ class name ast-foreign boa ]] +End = !(.) + +Program = TopLevelForm End + +;EBNF \ No newline at end of file diff --git a/extra/smalltalk/parser/test.st b/extra/smalltalk/parser/test.st new file mode 100644 index 0000000000..063f20882a --- /dev/null +++ b/extra/smalltalk/parser/test.st @@ -0,0 +1,65 @@ +class TreeNode extends Object [ + |left right item| + + method binarytrees: n to: output [ + | minDepth maxDepth stretchDepth check longLivedTree iterations | + minDepth := 4. + maxDepth := minDepth + 2 max: n. + stretchDepth := maxDepth + 1. + + check := (TreeNode bottomUpTree: 0 depth: stretchDepth) itemCheck. + output + nextPutAll: 'stretch tree of depth '; print: stretchDepth; tab; + nextPutAll: ' check: '; print: check; nl. + + longLivedTree := TreeNode bottomUpTree: 0 depth: maxDepth. + minDepth to: maxDepth by: 2 do: [:depth| + iterations := 1 bitShift: maxDepth - depth + minDepth. + + check := 0. + 1 to: iterations do: [:i| + check := check + (TreeNode bottomUpTree: i depth: depth) itemCheck. + check := check + (TreeNode bottomUpTree: -1*i depth: depth) itemCheck + ]. + output + print: (2*iterations); tab; + nextPutAll: ' trees of depth '; print: depth; tab; + nextPutAll: ' check: '; print: check; nl + ]. + + output + nextPutAll: 'long lived tree of depth '; print: maxDepth; tab; + nextPutAll: ' check: '; print: longLivedTree itemCheck; nl + ]. + + method binarytrees: arg [ + self binarytrees: arg to: self stdout. + ^'' + ]. + + method left: leftChild right: rightChild item: anItem [ + left := leftChild. + right := rightChild. + item := anItem + ]. + + method itemCheck [ + ^left isNil + ifTrue: [item] ifFalse: [item + (left itemCheck - right itemCheck)] + ]. + + method bottomUpTree: anItem depth: anInteger [ + ^(anInteger > 0) + ifTrue: [ + self + left: (self bottomUpTree: 2*anItem - 1 depth: anInteger - 1) + right: (self bottomUpTree: 2*anItem depth: anInteger - 1) + item: anItem + ] ifFalse: [self left: nil right: nil item: anItem] + ]. + + method left: leftChild right: rightChild item: anItem [ + ^(super new) left: leftChild right: rightChild item: anItem + ] +]. + diff --git a/extra/smalltalk/printer/authors.txt b/extra/smalltalk/printer/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/printer/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/printer/printer-tests.factor b/extra/smalltalk/printer/printer-tests.factor new file mode 100644 index 0000000000..e9f4bd9451 --- /dev/null +++ b/extra/smalltalk/printer/printer-tests.factor @@ -0,0 +1,4 @@ +IN: smalltalk.printer.tests +USING: smalltalk.printer tools.test ; + +[ "#((1 2) 'hi')" ] [ { { 1 2 } "hi" } smalltalk>string ] unit-test \ No newline at end of file diff --git a/extra/smalltalk/printer/printer.factor b/extra/smalltalk/printer/printer.factor new file mode 100644 index 0000000000..9b6aa11114 --- /dev/null +++ b/extra/smalltalk/printer/printer.factor @@ -0,0 +1,34 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays byte-arrays kernel make math +math.parser prettyprint sequences smalltalk.ast strings ; +IN: smalltalk.printer + +GENERIC: smalltalk>string ( object -- string ) + +M: real smalltalk>string number>string ; + +M: string smalltalk>string + [ + "'" % + [ dup CHAR: ' = [ dup , , ] [ , ] if ] each + "'" % + ] "" make ; + +GENERIC: array-element>string ( object -- string ) + +M: object array-element>string smalltalk>string ; + +M: array array-element>string + [ array-element>string ] map " " join "(" ")" surround ; + +M: array smalltalk>string + array-element>string "#" prepend ; + +M: byte-array smalltalk>string + [ number>string ] { } map-as " " join "#[" "]" surround ; + +M: symbol smalltalk>string + name>> smalltalk>string "#" prepend ; + +M: object smalltalk>string unparse-short ; \ No newline at end of file diff --git a/extra/smalltalk/selectors/authors.txt b/extra/smalltalk/selectors/authors.txt new file mode 100644 index 0000000000..d4f5d6b3ae --- /dev/null +++ b/extra/smalltalk/selectors/authors.txt @@ -0,0 +1 @@ +Slava Pestov \ No newline at end of file diff --git a/extra/smalltalk/selectors/selectors.factor b/extra/smalltalk/selectors/selectors.factor new file mode 100644 index 0000000000..2ea1e99afd --- /dev/null +++ b/extra/smalltalk/selectors/selectors.factor @@ -0,0 +1,28 @@ +! Copyright (C) 2009 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: combinators effects generic generic.standard +kernel sequences words lexer ; +IN: smalltalk.selectors + +SYMBOLS: unary binary keyword ; + +: selector-type ( selector -- type ) + { + { [ dup [ "~!@%&*-+=|\\<>,?/" member? ] all? ] [ binary ] } + { [ CHAR: : over member? ] [ keyword ] } + [ unary ] + } cond nip ; + +: selector>effect ( selector -- effect ) + dup selector-type { + { unary [ drop 0 ] } + { binary [ drop 1 ] } + { keyword [ [ CHAR: : = ] count ] } + } case "receiver" suffix { "result" } <effect> ; + +: selector>generic ( selector -- generic ) + [ "selector-" prepend "smalltalk.selectors" create dup ] + [ selector>effect ] + bi define-simple-generic ; + +SYNTAX: SELECTOR: scan selector>generic drop ; \ No newline at end of file diff --git a/extra/spider/spider-docs.factor b/extra/spider/spider-docs.factor index cdbd5e7e09..4ed00d39f6 100644 --- a/extra/spider/spider-docs.factor +++ b/extra/spider/spider-docs.factor @@ -16,11 +16,6 @@ HELP: run-spider { "spider" spider } } { $description "Runs a spider until completion. See the " { $subsection "spider-tutorial" } " for a complete description of the tuple slots that affect how thet spider works." } ; -HELP: slurp-heap-while -{ $values - { "heap" "a heap" } { "quot1" quotation } { "quot2" quotation } } -{ $description "Removes values from a heap that match the predicate quotation " { $snippet "quot1" } " and processes them with " { $snippet "quot2" } " until the predicate quotation no longer matches." } ; - ARTICLE: "spider-tutorial" "Spider tutorial" "To create a new spider, call the " { $link <spider> } " word with a link to the site you wish to spider." { $code <" "http://concatenative.org" <spider> "> } diff --git a/extra/spider/spider.factor b/extra/spider/spider.factor index bd5b2668be..d08276a9bb 100644 --- a/extra/spider/spider.factor +++ b/extra/spider/spider.factor @@ -3,8 +3,8 @@ USING: accessors fry html.parser html.parser.analyzer http.client kernel tools.time sets assocs sequences concurrency.combinators io threads namespaces math multiline -heaps math.parser inspector urls assoc-heaps logging -combinators.short-circuit continuations calendar prettyprint ; +math.parser inspector urls logging combinators.short-circuit +continuations calendar prettyprint dlists deques locals ; IN: spider TUPLE: spider base count max-count sleep max-depth initial-links @@ -13,12 +13,33 @@ filters spidered todo nonmatching quiet ; TUPLE: spider-result url depth headers fetch-time parsed-html links processing-time timestamp ; +TUPLE: todo-url url depth ; + +: <todo-url> ( url depth -- todo-url ) + todo-url new + swap >>depth + swap >>url ; + +TUPLE: unique-deque assoc deque ; + +: <unique-deque> ( -- unique-deque ) + H{ } clone <dlist> unique-deque boa ; + +: push-url ( url depth unique-deque -- ) + [ <todo-url> ] dip + [ [ [ t ] dip url>> ] [ assoc>> ] bi* set-at ] + [ deque>> push-back ] 2bi ; + +: pop-url ( unique-deque -- todo-url ) deque>> pop-front ; + +: peek-url ( unique-deque -- todo-url ) deque>> peek-front ; + : <spider> ( base -- spider ) >url spider new over >>base - swap 0 <unique-min-heap> [ heap-push ] keep >>todo - <unique-min-heap> >>nonmatching + swap 0 <unique-deque> [ push-url ] keep >>todo + <unique-deque> >>nonmatching 0 >>max-depth 0 >>count 1/0. >>max-count @@ -27,10 +48,10 @@ links processing-time timestamp ; <PRIVATE : apply-filters ( links spider -- links' ) - filters>> [ '[ _ 1&& ] filter ] when* ; + filters>> [ '[ [ _ 1&& ] filter ] call( seq -- seq' ) ] when* ; -: push-links ( links level assoc-heap -- ) - '[ _ _ heap-push ] each ; +: push-links ( links level unique-deque -- ) + '[ _ _ push-url ] each ; : add-todo ( links level spider -- ) todo>> push-links ; @@ -38,64 +59,72 @@ links processing-time timestamp ; : add-nonmatching ( links level spider -- ) nonmatching>> push-links ; -: filter-base ( spider spider-result -- base-links nonmatching-links ) +: filter-base-links ( spider spider-result -- base-links nonmatching-links ) [ base>> host>> ] [ links>> prune ] bi* [ host>> = ] with partition ; : add-spidered ( spider spider-result -- ) [ [ 1+ ] change-count ] dip 2dup [ spidered>> ] [ dup url>> ] bi* rot set-at - [ filter-base ] 2keep + [ filter-base-links ] 2keep depth>> 1+ swap [ add-nonmatching ] [ tuck [ apply-filters ] 2dip add-todo ] 2bi ; -: normalize-hrefs ( links -- links' ) - [ >url ] map - spider get base>> swap [ derive-url ] with map ; +: normalize-hrefs ( links spider -- links' ) + [ [ >url ] map ] dip + base>> swap [ derive-url ] with map ; : print-spidering ( url depth -- ) "depth: " write number>string write ", spidering: " write . yield ; -: (spider-page) ( url depth -- spider-result ) - f pick spider get spidered>> set-at - over '[ _ http-get ] benchmark swap - [ parse-html dup find-hrefs normalize-hrefs ] benchmark +:: new-spidered-result ( spider url depth -- spider-result ) + f url spider spidered>> set-at + [ url http-get ] benchmark :> fetch-time :> html :> headers + [ + html parse-html [ ] [ find-hrefs spider normalize-hrefs ] bi + ] benchmark :> processing-time :> links :> parsed-html + url depth headers fetch-time parsed-html links processing-time now spider-result boa ; -: spider-page ( url depth -- ) - spider get quiet>> [ 2dup print-spidering ] unless - (spider-page) - spider get [ quiet>> [ dup describe ] unless ] - [ swap add-spidered ] bi ; +:: spider-page ( spider url depth -- ) + spider quiet>> [ url depth print-spidering ] unless + spider url depth new-spidered-result :> spidered-result + spider quiet>> [ spidered-result describe ] unless + spider spidered-result add-spidered ; \ spider-page ERROR add-error-logging -: spider-sleep ( -- ) - spider get sleep>> [ sleep ] when* ; +: spider-sleep ( spider -- ) + sleep>> [ sleep ] when* ; -: queue-initial-links ( spider -- spider ) - [ initial-links>> normalize-hrefs 0 ] keep - [ add-todo ] keep ; +:: queue-initial-links ( spider -- spider ) + spider initial-links>> spider normalize-hrefs 0 spider add-todo spider ; -: slurp-heap-while ( heap quot1 quot2: ( value key -- ) -- ) - pick heap-empty? [ 3drop ] [ - [ [ heap-pop dup ] 2dip slip [ t ] compose [ 2drop f ] if ] - [ roll [ slurp-heap-while ] [ 3drop ] if ] 3bi - ] if ; inline recursive +: spider-page? ( spider -- ? ) + { + [ todo>> deque>> deque-empty? not ] + [ [ todo>> peek-url depth>> ] [ max-depth>> ] bi < ] + [ [ count>> ] [ max-count>> ] bi < ] + } 1&& ; + +: setup-next-url ( spider -- spider url depth ) + dup todo>> pop-url [ url>> ] [ depth>> ] bi ; + +: spider-next-page ( spider -- ) + setup-next-url spider-page ; PRIVATE> +: run-spider-loop ( spider -- ) + dup spider-page? [ + [ spider-next-page ] [ run-spider-loop ] bi + ] [ + drop + ] if ; + : run-spider ( spider -- spider ) "spider" [ - dup spider [ - queue-initial-links - [ todo>> ] [ max-depth>> ] bi - '[ - _ <= spider get - [ count>> ] [ max-count>> ] bi < and - ] [ spider-page spider-sleep ] slurp-heap-while - spider get - ] with-variable + queue-initial-links [ run-spider-loop ] keep ] with-logging ;