Merge branch 'master' of git://factorcode.org/git/factor

* 'master' of git://factorcode.org/git/factor: (37 commits)
  move assoc-heaps to extra
  remove tokenize-line
  make tokenize-line configurable, fix bug in take-quoted-string
  fix bug in state-parser, add take-token
  Make math.blas library and ABI choice configurable
  state-parser take-quoted-string rewinds if the string is not found
  refactor state parser some more, add a word to parse escaped strings
  set non-key/value attributes to themselves
  rename next to advance
  make html.parser words private
  remove dead code
  fix parsing of attributes for nofollows
  add take-sequence to state parser
  more cleanup
  cleaning up html.parser
  fix deployed name in minneapolis-talk, add summary/deploy to chicago talk
  spider - better handling of relative links for frames, dont spider things twice
  add chicago-talk to demos
  fix linux64 blas
  take-until doesnt pass the element to the quotation anymore
  ...
db4
Aaron Schaefer 2009-04-01 18:55:43 -04:00
commit d220288356
62 changed files with 1652 additions and 135 deletions

View File

@ -7,10 +7,10 @@ IN: alien.fortran
ARTICLE: "alien.fortran-abis" "Fortran ABIs"
"Fortran does not have a standard ABI like C does. Factor supports the following Fortran ABIs:"
{ $list
{ { $subsection gfortran-abi } " is used by gfortran, the Fortran compiler included with GCC 4." }
{ { $subsection f2c-abi } " is used by the F2C Fortran-to-C translator and G77, the Fortran compiler included with GCC 3.x and earlier. It is also used by gfortran when compiling with the -ff2c flag." }
{ { $subsection intel-unix-abi } " is used by the Intel Fortran Compiler on Linux and Mac OS X." }
{ { $subsection intel-windows-abi } " is used by the Intel Fortran Compiler on Windows." }
{ { $link gfortran-abi } " is used by gfortran, the Fortran compiler included with GCC 4." }
{ { $link f2c-abi } " is used by the F2C Fortran-to-C translator and G77, the Fortran compiler included with GCC 3.x and earlier. It is also used by gfortran when compiling with the -ff2c flag." }
{ { $link intel-unix-abi } " is used by the Intel Fortran Compiler on Linux and Mac OS X." }
{ { $link intel-windows-abi } " is used by the Intel Fortran Compiler on Windows." }
}
"A library's ABI is specified when that library is opened by the " { $link add-fortran-library } " word." ;

View File

@ -10,12 +10,4 @@ IN: bootstrap.ui
{ [ os unix? ] [ "x11" ] }
} cond
] unless* "ui.backend." prepend require
"ui-text-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require
] when

View File

@ -0,0 +1,23 @@
USING: alien.fortran help.markup help.syntax math.blas.config multiline ;
IN: math.blas.config
ARTICLE: "math.blas.config" "Configuring the BLAS interface"
"The " { $link "math.blas-summary" } " chooses the underlying BLAS interface to use based on the values of the following global variables:"
{ $subsection blas-library }
{ $subsection blas-fortran-abi }
"The interface attempts to set default values based on the ones encountered on the Factor project's build machines. If these settings don't work with your system's BLAS, or you wish to use a commercial BLAS, you may change the global values of those variables in your " { $link "factor-rc" } ". For example, to use AMD's ACML library on Windows with " { $snippet "math.blas" } ", your " { $snippet "factor-rc" } " would look like this:"
{ $code <"
USING: math.blas.config namespaces ;
"X:\\path\\to\\acml.dll" blas-library set-global
intel-windows-abi blas-fortran-abi set-global
"> }
"To take effect, the " { $snippet "blas-library" } " and " { $snippet "blas-fortran-abi" } " variables must be set before any other " { $snippet "math.blas" } " vocabularies are loaded."
;
HELP: blas-library
{ $description "The name of the shared library containing the BLAS interface to load. The value of this variable must be a valid shared library name that can be passed to " { $link add-fortran-library } ". To take effect, this variable must be set before any other " { $snippet "math.blas" } " vocabularies are loaded. See " { $link "math.blas.config" } " for details and examples." } ;
HELP: blas-fortran-abi
{ $description "The Fortran ABI used by the BLAS interface specified in the " { $link blas-library } " variable. The value of " { $snippet "blas-fortran-abi" } " must be one of the " { $link "alien.fortran-abis" } " that can be passed to " { $link add-fortran-library } ". To take effect, this variable must be set before any other " { $snippet "math.blas" } " vocabularies are loaded. See " { $link "math.blas.config" } " for details and examples." } ;
ABOUT: "math.blas.config"

View File

@ -0,0 +1,23 @@
USING: alien.fortran combinators kernel namespaces system ;
IN: math.blas.config
SYMBOLS: blas-library blas-fortran-abi ;
blas-library [
{
{ [ os macosx? ] [ "libblas.dylib" ] }
{ [ os windows? ] [ "blas.dll" ] }
[ "libblas.so" ]
} cond
] initialize
blas-fortran-abi [
{
{ [ os macosx? ] [ intel-unix-abi ] }
{ [ os windows? cpu x86.32? and ] [ f2c-abi ] }
{ [ os windows? cpu x86.64? and ] [ gfortran-abi ] }
{ [ os freebsd? ] [ gfortran-abi ] }
{ [ os linux? cpu x86.32? and ] [ gfortran-abi ] }
[ f2c-abi ]
} cond
] initialize

View File

@ -1,19 +1,9 @@
USING: alien alien.fortran kernel system combinators
alien.libraries ;
USING: alien.fortran kernel math.blas.config namespaces ;
IN: math.blas.ffi
<<
"blas" {
{ [ os macosx? ] [ "libblas.dylib" intel-unix-abi add-fortran-library ] }
{ [ os windows? cpu x86.32? and ] [ "blas.dll" f2c-abi add-fortran-library ] }
{ [ os windows? cpu x86.64? and ] [ "blas.dll" gfortran-abi add-fortran-library ] }
{
[ os [ freebsd? ] [ linux? cpu x86.32? and ] bi or ]
[ "libblas.so" gfortran-abi add-fortran-library ]
}
{ [ os [ freebsd? ] [ linux? ] bi or ] [ "libblas.so" gfortran-abi add-fortran-library ] }
[ "libblas.so" f2c-abi add-fortran-library ]
} cond
"blas" blas-library blas-fortran-abi [ get ] bi@
add-fortran-library
>>
LIBRARY: blas

View File

@ -2,13 +2,14 @@ USING: alien byte-arrays help.markup help.syntax math math.blas.vectors sequence
IN: math.blas.matrices
ARTICLE: "math.blas-summary" "Basic Linear Algebra Subroutines (BLAS) interface"
"Factor provides an interface to high-performance vector and matrix math routines available in the system's BLAS library. A set of specialized types are provided for handling packed, unboxed vector data:"
"Factor provides an interface to high-performance vector and matrix math routines available in implementations of the BLAS math library. A set of specialized types are provided for handling packed, unboxed vector data:"
{ $subsection "math.blas-types" }
"Scalar-vector and vector-vector operations are available in the " { $vocab-link "math.blas.vectors" } " vocabulary:"
{ $subsection "math.blas.vectors" }
"Vector-matrix and matrix-matrix operations are available in the " { $vocab-link "math.blas.matrices" } " vocabulary:"
{ $subsection "math.blas.matrices" }
"The low-level BLAS Fortran interface can be accessed directly through the " { $vocab-link "math.blas.ffi" } " vocabulary." ;
"The low-level BLAS Fortran interface can be accessed directly through the " { $vocab-link "math.blas.ffi" } " vocabulary. The BLAS interface can be configured to use different underlying BLAS implementations:"
{ $subsection "math.blas.config" } ;
ARTICLE: "math.blas-types" "BLAS interface types"
"BLAS vectors come in single- and double-precision, real and complex flavors:"

View File

@ -155,18 +155,21 @@ TUPLE: peg-head rule-id involved-set eval-set ;
dup pos>> pos set ans>>
; inline
:: (setup-lr) ( r l s -- )
s head>> l head>> eq? [
l head>> s (>>head)
l head>> [ s rule-id>> suffix ] change-involved-set drop
r l s next>> (setup-lr)
] unless ;
:: (setup-lr) ( l s -- )
s [
s left-recursion? [ s throw ] unless
s head>> l head>> eq? [
l head>> s (>>head)
l head>> [ s rule-id>> suffix ] change-involved-set drop
l s next>> (setup-lr)
] unless
] when ;
:: setup-lr ( r l -- )
l head>> [
r rule-id V{ } clone V{ } clone peg-head boa l (>>head)
] unless
r l lrstack get (setup-lr) ;
l lrstack get (setup-lr) ;
:: lr-answer ( r p m -- ast )
[let* |
@ -216,8 +219,10 @@ TUPLE: peg-head rule-id involved-set eval-set ;
lrstack get next>> lrstack set
pos get m (>>pos)
lr head>> [
ans lr (>>seed)
r p m lr-answer
m ans>> left-recursion? [
ans lr (>>seed)
r p m lr-answer
] [ ans ] if
] [
ans m (>>ans)
ans

View File

@ -3,8 +3,7 @@
USING: accessors arrays assocs continuations kernel math models
namespaces opengl sequences io combinators combinators.short-circuit
fry math.vectors math.rectangles cache ui.gadgets ui.gestures
ui.render ui.text ui.text.private ui.backend ui.gadgets.tracks
ui.commands ;
ui.render ui.backend ui.gadgets.tracks ui.commands ;
IN: ui.gadgets.worlds
TUPLE: world < track
@ -53,7 +52,6 @@ M: world request-focus-on ( child gadget -- )
swap >>status
swap >>title
swap 1 track-add
dup init-text-rendering
dup request-focus ;
: <world> ( gadget title status -- world )
@ -74,15 +72,20 @@ M: world remove-gadget
2dup layers>> memq?
[ layers>> delq ] [ call-next-method ] if ;
SYMBOL: flush-layout-cache-hook
flush-layout-cache-hook [ [ ] ] initialize
: (draw-world) ( world -- )
dup handle>> [
{
[ init-gl ]
[ draw-gadget ]
[ finish-text-rendering ]
[ text-handle>> [ purge-cache ] when* ]
[ images>> [ purge-cache ] when* ]
} cleave
] with-gl-context ;
] with-gl-context
flush-layout-cache-hook get call( -- ) ;
: draw-world? ( world -- ? )
#! We don't draw deactivated worlds, or those with 0 size.

View File

@ -18,12 +18,11 @@ M: core-text-renderer string-dim
[ cached-line dim>> ]
if-empty ;
M: core-text-renderer finish-text-rendering
text-handle>> purge-cache
M: core-text-renderer flush-layout-cache
cached-lines get purge-cache ;
: rendered-line ( font string -- texture )
world get text-handle>>
world get world-text-handle
[ cached-line [ image>> ] [ loc>> ] bi <texture> ]
2cache ;

View File

@ -14,12 +14,11 @@ M: pango-renderer string-dim
[ " " string-dim { 0 1 } v* ]
[ cached-layout logical-rect>> dim>> [ >integer ] map ] if-empty ;
M: pango-renderer finish-text-rendering
text-handle>> purge-cache
M: pango-renderer flush-layout-cache
cached-layouts get purge-cache ;
: rendered-layout ( font string -- texture )
world get text-handle>>
world get world-text-handle
[ cached-layout [ image>> ] [ text-position vneg ] bi <texture> ]
2cache ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel arrays sequences math math.order opengl opengl.gl
strings fonts colors accessors ;
strings fonts colors accessors namespaces ui.gadgets.worlds ;
IN: ui.text
<PRIVATE
@ -10,9 +10,13 @@ SYMBOL: font-renderer
HOOK: init-text-rendering font-renderer ( world -- )
HOOK: finish-text-rendering font-renderer ( world -- )
: world-text-handle ( world -- handle )
dup text-handle>> [ dup init-text-rendering ] unless
text-handle>> ;
M: object finish-text-rendering drop ;
HOOK: flush-layout-cache font-renderer ( -- )
[ flush-layout-cache ] flush-layout-cache-hook set-global
HOOK: string-dim font-renderer ( font string -- dim )
@ -68,4 +72,14 @@ M: array draw-text
[ draw-string ]
[ [ 0.0 ] 2dip string-height 0.0 glTranslated ] 2bi
] with each
] do-matrix ;
] do-matrix ;
USING: vocabs.loader namespaces system combinators ;
"ui-backend" get [
{
{ [ os macosx? ] [ "core-text" ] }
{ [ os windows? ] [ "pango" ] }
{ [ os unix? ] [ "pango" ] }
} cond
] unless* "ui.text." prepend require

View File

@ -4,8 +4,7 @@ USING: arrays assocs io kernel math models namespaces make dlists
deques sequences threads sequences words continuations init
combinators hashtables concurrency.flags sets accessors calendar fry
destructors ui.gadgets ui.gadgets.private ui.gadgets.worlds
ui.gadgets.tracks ui.gestures ui.backend ui.render ui.text
ui.text.private ;
ui.gadgets.tracks ui.gestures ui.backend ui.render ;
IN: ui
<PRIVATE
@ -63,7 +62,7 @@ M: world graft*
: (ungraft-world) ( world -- )
{
[ handle>> select-gl-context ]
[ text-handle>> dispose ]
[ text-handle>> [ dispose ] when* ]
[ images>> [ dispose ] when* ]
[ hand-clicked close-global ]
[ hand-gadget close-global ]
@ -95,8 +94,7 @@ M: world ungraft*
: restore-world ( world -- )
{
[ reset-world ]
[ init-text-rendering ]
[ f >>images drop ]
[ f >>text-handle f >>images drop ]
[ restore-gadget ]
} cleave ;

View File

@ -97,7 +97,7 @@ ERROR: bad-slot-value value class ;
"writing" associate ;
: define-writer-generic ( name -- )
writer-word (( object value -- )) define-simple-generic ;
writer-word (( value object -- )) define-simple-generic ;
: define-writer ( class slot-spec -- )
[ nip name>> define-writer-generic ] [

View File

@ -0,0 +1,12 @@
USING: tools.deploy.config ;
V{
{ deploy-ui? t }
{ deploy-io 1 }
{ deploy-reflection 1 }
{ deploy-compiler? t }
{ deploy-math? t }
{ deploy-word-props? f }
{ deploy-c-types? f }
{ "stop-after-last-window?" t }
{ deploy-name "Chicago Talk" }
}

1
extra/chicago-talk/summary.txt Executable file
View File

@ -0,0 +1 @@
Slides for a talk at the Pycon VM Summit, Chicago, IL, March 2009

View File

@ -0,0 +1 @@
demos

View File

@ -3,7 +3,7 @@
USING: assocs html.parser kernel math sequences strings ascii
arrays generalizations shuffle unicode.case namespaces make
splitting http accessors io combinators http.client urls
urls.encoding fry prettyprint ;
urls.encoding fry prettyprint sets ;
IN: html.parser.analyzer
TUPLE: link attributes clickable ;
@ -126,7 +126,15 @@ TUPLE: link attributes clickable ;
[ [
[ name>> "a" = ]
[ attributes>> "href" swap key? ] bi and ] filter
] map sift [ [ attributes>> "href" swap at ] map ] map concat ;
] map sift
[ [ attributes>> "href" swap at ] map ] map concat ;
: find-frame-links ( vector -- vector' )
[ name>> "frame" = ] find-between-all
[ [ attributes>> "src" swap at ] map sift ] map concat sift ;
: find-all-links ( vector -- vector' )
[ find-hrefs ] [ find-frame-links ] bi append prune ;
: find-forms ( vector -- vector' )
"form" over find-opening-tags-by-name

View File

@ -42,6 +42,19 @@ V{
}
] [ "<a href = \"http://factorcode.org/\" foo = bar baz='quux'a=pirsqd >" parse-html ] unit-test
[
V{
T{ tag f "a"
H{
{ "a" "pirsqd" }
{ "foo" "bar" }
{ "href" "http://factorcode.org/" }
{ "baz" "quux" }
{ "nofollow" "nofollow" }
} f f }
}
] [ "<a href = \"http://factorcode.org/\" nofollow foo = bar baz='quux'a=pirsqd >" parse-html ] unit-test
[
V{
T{ tag f "html" H{ } f f }

View File

@ -1,17 +1,19 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays hashtables html.parser.state
html.parser.utils kernel make namespaces sequences
html.parser.utils kernel namespaces sequences
unicode.case unicode.categories combinators.short-circuit
quoting ;
quoting fry ;
IN: html.parser
TUPLE: tag name attributes text closing? ;
SINGLETON: text
SINGLETON: dtd
SINGLETON: comment
<PRIVATE
SYMBOL: tagstack
: push-tag ( tag -- )
@ -19,7 +21,7 @@ SYMBOL: tagstack
: closing-tag? ( string -- ? )
[ f ]
[ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
[ { [ first CHAR: / = ] [ peek CHAR: / = ] } 1|| ] if-empty ;
: <tag> ( name attributes closing? -- tag )
tag new
@ -30,22 +32,19 @@ SYMBOL: tagstack
: make-tag ( string attribs -- tag )
[ [ closing-tag? ] keep "/" trim1 ] dip rot <tag> ;
: new-tag ( string type -- tag )
: new-tag ( text name -- tag )
tag new
swap >>name
swap >>text ; inline
: make-text-tag ( string -- tag ) text new-tag ; inline
: make-comment-tag ( string -- tag ) comment new-tag ; inline
: make-dtd-tag ( string -- tag ) dtd new-tag ; inline
: (read-quote) ( state-parser ch -- string )
'[ [ current _ = ] take-until ] [ advance drop ] bi ;
: read-single-quote ( state-parser -- string )
[ [ CHAR: ' = ] take-until ] [ next drop ] bi ;
CHAR: ' (read-quote) ;
: read-double-quote ( state-parser -- string )
[ [ CHAR: " = ] take-until ] [ next drop ] bi ;
CHAR: " (read-quote) ;
: read-quote ( state-parser -- string )
dup get+increment CHAR: ' =
@ -53,57 +52,51 @@ SYMBOL: tagstack
: read-key ( state-parser -- string )
skip-whitespace
[ { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ;
: read-= ( state-parser -- )
skip-whitespace
[ [ CHAR: = = ] take-until drop ] [ next drop ] bi ;
[ current { [ CHAR: = = ] [ blank? ] } 1|| ] take-until ;
: read-token ( state-parser -- string )
[ blank? ] take-until ;
[ current blank? ] take-until ;
: read-value ( state-parser -- string )
skip-whitespace
dup get-char quote? [ read-quote ] [ read-token ] if
dup current quote? [ read-quote ] [ read-token ] if
[ blank? ] trim ;
: read-comment ( state-parser -- )
"-->" take-until-sequence make-comment-tag push-tag ;
"-->" take-until-sequence comment new-tag push-tag ;
: read-dtd ( state-parser -- )
">" take-until-sequence make-dtd-tag push-tag ;
">" take-until-sequence dtd new-tag push-tag ;
: read-bang ( state-parser -- )
next dup { [ get-char CHAR: - = ] [ get-next CHAR: - = ] } 1&& [
next next
read-comment
] [
read-dtd
] if ;
advance dup { [ current CHAR: - = ] [ peek-next CHAR: - = ] } 1&&
[ advance advance read-comment ] [ read-dtd ] if ;
: read-tag ( state-parser -- string )
[ [ "><" member? ] take-until ]
[ dup get-char CHAR: < = [ next ] unless drop ] bi ;
[ [ current "><" member? ] take-until ]
[ dup current CHAR: < = [ advance ] unless drop ] bi ;
: read-until-< ( state-parser -- string )
[ CHAR: < = ] take-until ;
[ current CHAR: < = ] take-until ;
: parse-text ( state-parser -- )
read-until-< [ make-text-tag push-tag ] unless-empty ;
read-until-< [ text new-tag push-tag ] unless-empty ;
: parse-key/value ( state-parser -- key value )
[ read-key >lower ]
[ skip-whitespace "=" take-sequence ]
[ swap [ read-value ] [ drop dup ] if ] tri ;
: (parse-attributes) ( state-parser -- )
skip-whitespace
dup state-parse-end? [
drop
] [
[
[ read-key >lower ] [ read-= ] [ read-value ] tri
2array ,
] keep (parse-attributes)
[ parse-key/value swap set ] [ (parse-attributes) ] bi
] if ;
: parse-attributes ( state-parser -- hashtable )
[ (parse-attributes) ] { } make >hashtable ;
[ (parse-attributes) ] H{ } make-assoc ;
: (parse-tag) ( string -- string' hashtable )
[
@ -111,7 +104,7 @@ SYMBOL: tagstack
] state-parse ;
: read-< ( state-parser -- string/f )
next dup get-char [
advance dup current [
CHAR: ! = [ read-bang f ] [ read-tag ] if
] [
drop f
@ -121,12 +114,14 @@ SYMBOL: tagstack
read-< [ (parse-tag) make-tag push-tag ] unless-empty ;
: (parse-html) ( state-parser -- )
dup get-next [
dup peek-next [
[ parse-text ] [ parse-tag ] [ (parse-html) ] tri
] [ drop ] if ;
: tag-parse ( quot -- vector )
V{ } clone tagstack [ state-parse ] with-variable ; inline
PRIVATE>
: parse-html ( string -- vector )
[ (parse-html) tagstack get ] tag-parse ;

View File

@ -7,7 +7,7 @@ IN: html.parser.state.tests
[ "hi" " how are you?" ]
[
"hi how are you?"
[ [ [ blank? ] take-until ] [ take-rest ] bi ] state-parse
[ [ [ current blank? ] take-until ] [ take-rest ] bi ] state-parse
] unit-test
[ "foo" ";bar" ]
@ -30,7 +30,66 @@ IN: html.parser.state.tests
] unit-test
[ { 1 2 } ]
[ { 1 2 3 } <state-parser> [ 3 = ] take-until ] unit-test
[ { 1 2 3 } <state-parser> [ current 3 = ] take-until ] unit-test
[ { 1 2 } ]
[ { 1 2 3 4 } <state-parser> { 3 4 } take-until-sequence ] unit-test
[ "ab" ]
[ "abcd" <state-parser> "ab" take-sequence ] unit-test
[ f ]
[ "abcd" <state-parser> "lol" take-sequence ] unit-test
[ "ab" ]
[
"abcd" <state-parser>
[ "lol" take-sequence drop ] [ "ab" take-sequence ] bi
] unit-test
[ "" ]
[ "abcd" <state-parser> "" take-sequence ] unit-test
[ "cd" ]
[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
[ f ]
[
"\"abc\" asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
] unit-test
[ "abc\\\"def" ]
[
"\"abc\\\"def\" asdf" <state-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "asdf" ]
[
"\"abc\" asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ skip-whitespace "asdf" take-sequence ] bi
] unit-test
[ f ]
[
"\"abc asdf" <state-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "\"abc" ]
[
"\"abc asdf" <state-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ "\"abc" take-sequence ] bi
] unit-test
[ "c" ]
[ "c" <state-parser> take-token ] unit-test
[ f ]
[ "" <state-parser> take-token ] unit-test
[ "abcd e \\\"f g" ]
[ "\"abcd e \\\"f g\"" <state-parser> CHAR: \ CHAR: " take-token* ] unit-test

View File

@ -1,7 +1,8 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular
unicode.case unicode.categories locals ;
unicode.case unicode.categories locals combinators.short-circuit
make combinators ;
IN: html.parser.state
@ -12,32 +13,33 @@ TUPLE: state-parser sequence n ;
swap >>sequence
0 >>n ;
: (get-char) ( n state -- char/f )
sequence>> ?nth ; inline
: offset ( state-parser offset -- char/f )
swap
[ n>> + ] [ sequence>> ?nth ] bi ; inline
: get-char ( state -- char/f )
[ n>> ] keep (get-char) ; inline
: current ( state-parser -- char/f ) 0 offset ; inline
: get-next ( state -- char/f )
[ n>> 1 + ] keep (get-char) ; inline
: previous ( state-parser -- char/f ) -1 offset ; inline
: next ( state -- state )
: peek-next ( state-parser -- char/f ) 1 offset ; inline
: advance ( state-parser -- state-parser )
[ 1 + ] change-n ; inline
: get+increment ( state -- char/f )
[ get-char ] [ next drop ] bi ; inline
: advance* ( state-parser -- )
advance drop ; inline
: state-parse ( sequence quot -- )
[ <state-parser> ] dip call ; inline
: get+increment ( state-parser -- char/f )
[ current ] [ advance drop ] bi ; inline
:: skip-until ( state quot: ( obj -- ? ) -- )
state get-char [
quot call [ state next quot skip-until ] unless
] when* ; inline recursive
:: skip-until ( state-parser quot: ( obj -- ? ) -- )
state-parser current [
state-parser quot call [ state-parser advance quot skip-until ] unless
] when ; inline recursive
: state-parse-end? ( state -- ? ) get-next not ;
: state-parse-end? ( state-parser -- ? ) current not ;
: take-until ( state quot: ( obj -- ? ) -- sequence/f )
: take-until ( state-parser quot: ( obj -- ? ) -- sequence/f )
over state-parse-end? [
2drop f
] [
@ -46,22 +48,66 @@ TUPLE: state-parser sequence n ;
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
] if ; inline
: take-while ( state-parser quot: ( obj -- ? ) -- sequence/f )
[ not ] compose take-until ; inline
:: take-sequence ( state-parser sequence -- obj/f )
state-parser [ n>> dup sequence length + ] [ sequence>> ] bi <slice>
sequence sequence= [
sequence
state-parser [ sequence length + ] change-n drop
] [
f
] if ;
:: take-until-sequence ( state-parser sequence -- sequence' )
sequence length <growing-circular> :> growing
state-parser
[
growing push-growing-circular
current growing push-growing-circular
sequence growing sequence=
] take-until :> found
found dup length
growing length 1- - head
state-parser next drop ;
state-parser advance drop ;
: skip-whitespace ( state -- state )
[ [ blank? not ] take-until drop ] keep ;
: skip-whitespace ( state-parser -- state-parser )
[ [ current blank? not ] take-until drop ] keep ;
: take-rest ( state -- sequence )
: take-rest ( state-parser -- sequence )
[ drop f ] take-until ; inline
: take-until-object ( state obj -- sequence )
'[ _ = ] take-until ;
: take-until-object ( state-parser obj -- sequence )
'[ current _ = ] take-until ;
: state-parse ( sequence quot -- )
[ <state-parser> ] dip call ; inline
:: take-quoted-string ( state-parser escape-char quote-char -- string )
state-parser n>> :> start-n
state-parser advance
[
{
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
[ current quote-char = not ]
} 1||
] take-while :> string
state-parser current quote-char = [
state-parser advance* string
] [
start-n state-parser (>>n) f
] if ;
: (take-token) ( state-parser -- string )
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
:: take-token* ( state-parser escape-char quote-char -- string/f )
state-parser skip-whitespace
dup current {
{ quote-char [ escape-char quote-char take-quoted-string ] }
{ f [ drop f ] }
[ drop (take-token) ]
} case ;
: take-token ( state-parser -- string/f )
CHAR: \ CHAR: " take-token* ;

View File

@ -8,5 +8,5 @@ V{
{ deploy-word-props? f }
{ deploy-c-types? f }
{ "stop-after-last-window?" t }
{ deploy-name "Catalyst Talk" }
{ deploy-name "Minnesota Talk" }
}

View File

@ -1 +1 @@
Slides for a talk at Ruby.mn, Minneapolis MN, January 2008
Slides for a talk at Ruby.mn, Minneapolis, MN, January 2008

View File

@ -0,0 +1,53 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: strings arrays memoize kernel sequences accessors combinators ;
IN: smalltalk.ast
SINGLETONS: nil self super ;
TUPLE: ast-comment { string string } ;
TUPLE: ast-block { arguments array } { temporaries array } { body array } ;
TUPLE: ast-message-send receiver { selector string } { arguments array } ;
TUPLE: ast-message { selector string } { arguments array } ;
TUPLE: ast-cascade receiver { messages array } ;
TUPLE: ast-name { name string } ;
TUPLE: ast-return value ;
TUPLE: ast-assignment { name ast-name } value ;
TUPLE: ast-local-variables { names array } ;
TUPLE: ast-method { name string } { body ast-block } ;
TUPLE: ast-class { name string } { superclass string } { ivars array } { methods array } ;
TUPLE: ast-foreign { class string } { name string } ;
TUPLE: ast-sequence { temporaries array } { body array } ;
! We treat a sequence of statements like a block in a few places to
! simplify handling of top-level forms
M: ast-sequence arguments>> drop { } ;
: unclip-temporaries ( statements -- temporaries statements' )
{
{ [ dup empty? ] [ { } ] }
{ [ dup first ast-local-variables? not ] [ { } ] }
[ unclip names>> ]
} cond swap ;
: <ast-block> ( arguments body -- block )
unclip-temporaries ast-block boa ;
: <ast-sequence> ( body -- block )
unclip-temporaries ast-sequence boa ;
! The parser parses normal message sends as cascades with one message, but
! we represent them differently in the AST to simplify generated code in
! the common case
: <ast-cascade> ( receiver messages -- ast )
dup length 1 =
[ first [ selector>> ] [ arguments>> ] bi ast-message-send boa ]
[ ast-cascade boa ]
if ;
! Methods return self by default
: <ast-method> ( class arguments body -- method )
self suffix <ast-block> ast-method boa ;
TUPLE: symbol { name string } ;
MEMO: intern ( name -- symbol ) symbol boa ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,25 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces assocs accessors words sequences classes.tuple ;
IN: smalltalk.classes
SYMBOL: classes
classes [ H{ } clone ] initialize
: create-class ( class -- class )
"smalltalk.classes" create ;
ERROR: no-class name ;
: lookup-class ( class -- class )
classes get ?at [ ] [ no-class ] if ;
: define-class ( class superclass ivars -- class-word )
[ create-class ] [ lookup-class ] [ ] tri*
[ define-tuple-class ] [ 2drop dup dup name>> classes get set-at ] 3bi ;
: define-foreign ( class name -- )
classes get set-at ;
tuple "Object" define-foreign

View File

@ -0,0 +1,36 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays kernel sequences sets smalltalk.ast ;
IN: smalltalk.compiler.assignment
GENERIC: assigned-locals ( ast -- seq )
M: ast-return assigned-locals value>> assigned-locals ;
M: ast-block assigned-locals
[ body>> assigned-locals ] [ arguments>> ] bi diff ;
M: ast-message-send assigned-locals
[ receiver>> assigned-locals ]
[ arguments>> assigned-locals ]
bi append ;
M: ast-cascade assigned-locals
[ receiver>> assigned-locals ]
[ messages>> assigned-locals ]
bi append ;
M: ast-message assigned-locals
arguments>> assigned-locals ;
M: ast-assignment assigned-locals
[ name>> dup ast-name? [ name>> 1array ] [ drop { } ] if ]
[ value>> assigned-locals ] bi append ;
M: ast-sequence assigned-locals
body>> assigned-locals ;
M: array assigned-locals
[ assigned-locals ] map concat ;
M: object assigned-locals drop f ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,87 @@
USING: smalltalk.compiler tools.test prettyprint smalltalk.ast
smalltalk.compiler.lexenv stack-checker locals.rewrite.closures
kernel accessors compiler.units sequences arrays ;
IN: smalltalk.compiler.tests
: test-compilation ( ast -- quot )
[
1array ast-sequence new swap >>body
compile-smalltalk [ call ] append
] with-compilation-unit ;
: test-inference ( ast -- in# out# )
test-compilation infer [ in>> ] [ out>> ] bi ;
[ 2 1 ] [
T{ ast-block f
{ "a" "b" }
{
T{ ast-message-send f
T{ ast-name f "a" }
"+"
{ T{ ast-name f "b" } }
}
}
} test-inference
] unit-test
[ 3 1 ] [
T{ ast-block f
{ "a" "b" "c" }
{
T{ ast-assignment f
T{ ast-name f "a" }
T{ ast-message-send f
T{ ast-name f "c" }
"+"
{ T{ ast-name f "b" } }
}
}
T{ ast-message-send f
T{ ast-name f "b" }
"blah:"
{ 123.456 }
}
T{ ast-return f T{ ast-name f "c" } }
}
} test-inference
] unit-test
[ 0 1 ] [
T{ ast-block f
{ }
{ }
{
T{ ast-message-send
{ receiver 1 }
{ selector "to:do:" }
{ arguments
{
10
T{ ast-block
{ arguments { "i" } }
{ body
{
T{ ast-message-send
{ receiver
T{ ast-name { name "i" } }
}
{ selector "print" }
}
}
}
}
}
}
}
}
} test-inference
] unit-test
[ "a" ] [
T{ ast-block f
{ }
{ }
{ { T{ ast-block { body { "a" } } } } }
} test-compilation call first call
] unit-test

View File

@ -0,0 +1,157 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators.short-circuit
continuations fry kernel namespaces quotations sequences sets
generalizations slots locals.types splitting math
locals.rewrite.closures generic words combinators locals smalltalk.ast
smalltalk.compiler.lexenv smalltalk.compiler.assignment
smalltalk.compiler.return smalltalk.selectors smalltalk.classes ;
IN: smalltalk.compiler
GENERIC: compile-ast ( lexenv ast -- quot )
M: object compile-ast nip 1quotation ;
M: self compile-ast drop self>> 1quotation ;
ERROR: unbound-local name ;
M: ast-name compile-ast name>> swap lookup-reader ;
: compile-arguments ( lexenv ast -- quot )
arguments>> [ compile-ast ] with map [ ] join ;
: compile-new ( lexenv ast -- quot )
[ receiver>> compile-ast ]
[ compile-arguments ] 2bi
[ new ] 3append ;
: compile-ifTrue:ifFalse: ( lexenv ast -- quot )
[ receiver>> compile-ast ]
[ compile-arguments ] 2bi
[ if ] 3append ;
M: ast-message-send compile-ast
dup selector>> {
{ "ifTrue:ifFalse:" [ compile-ifTrue:ifFalse: ] }
{ "new" [ compile-new ] }
[
drop
[ compile-arguments ]
[ receiver>> compile-ast ]
[ nip selector>> selector>generic ]
2tri [ append ] dip suffix
]
} case ;
M: ast-cascade compile-ast
[ receiver>> compile-ast ]
[
messages>> [
[ compile-arguments \ dip ]
[ selector>> selector>generic ] bi
[ ] 3sequence
] with map
unclip-last [ [ [ drop ] append ] map ] dip suffix
cleave>quot
] 2bi append ;
M: ast-return compile-ast
[ value>> compile-ast ] [ drop return>> 1quotation ] 2bi
[ continue-with ] 3append ;
: (compile-sequence) ( lexenv asts -- quot )
[ drop [ nil ] ] [
[ compile-ast ] with map [ drop ] join
] if-empty ;
: block-lexenv ( block -- lexenv )
[ [ arguments>> ] [ temporaries>> ] bi append ]
[ body>> [ assigned-locals ] map concat unique ] bi
'[
dup dup _ key?
[ <local-reader> ]
[ <local> ]
if
] H{ } map>assoc
dup
[ nip local-reader? ] assoc-filter
[ <local-writer> ] assoc-map
<lexenv> swap >>local-writers swap >>local-readers ;
: lookup-block-vars ( vars lexenv -- seq )
local-readers>> '[ _ at ] map ;
: make-temporaries ( block lexenv -- quot )
[ temporaries>> ] dip lookup-block-vars
[ <def> [ f ] swap suffix ] map [ ] join ;
:: compile-sequence ( lexenv block -- vars quot )
lexenv block block-lexenv lexenv-union :> lexenv
block arguments>> lexenv lookup-block-vars
lexenv block body>> (compile-sequence) block lexenv make-temporaries prepend ;
M: ast-sequence compile-ast
compile-sequence nip ;
GENERIC: contains-blocks? ( obj -- ? )
M: ast-block contains-blocks? drop t ;
M: object contains-blocks? drop f ;
M: array contains-blocks? [ contains-blocks? ] any? ;
M: array compile-ast
dup contains-blocks? [
[ [ compile-ast ] with map [ ] join ] [ length ] bi
'[ @ _ narray ]
] [ call-next-method ] if ;
GENERIC: compile-assignment ( lexenv name -- quot )
M: ast-name compile-assignment name>> swap lookup-writer ;
M: ast-assignment compile-ast
[ value>> compile-ast [ dup ] ] [ name>> compile-assignment ] 2bi 3append ;
M: ast-block compile-ast
compile-sequence <lambda> '[ _ ] ;
:: (compile-method-body) ( lexenv block -- lambda )
lexenv block compile-sequence
[ lexenv self>> suffix ] dip <lambda> ;
: compile-method-body ( lexenv block -- quot )
[ [ (compile-method-body) ] [ arguments>> length 1+ ] bi ] 2keep
make-return ;
: compile-method ( lexenv ast-method -- )
[ [ class>> ] [ name>> selector>generic ] bi* create-method ]
[ body>> compile-method-body ]
2bi define ;
: <class-lexenv> ( class -- lexenv )
<lexenv> swap >>class "self" <local> >>self "^" <local> >>return ;
M: ast-class compile-ast
nip
[
[ name>> ] [ superclass>> ] [ ivars>> ] tri
define-class <class-lexenv>
]
[ methods>> ] bi
[ compile-method ] with each
[ nil ] ;
ERROR: no-word name ;
M: ast-foreign compile-ast
nip
[ class>> dup ":" split1 lookup [ ] [ no-word ] ?if ]
[ name>> ] bi define-foreign
[ nil ] ;
: compile-smalltalk ( statement -- quot )
[ empty-lexenv ] dip [ compile-sequence nip 0 ]
2keep make-return ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,24 @@
USING: smalltalk.compiler.lexenv tools.test kernel namespaces accessors ;
IN: smalltalk.compiler.lexenv.tests
TUPLE: some-class x y z ;
SYMBOL: fake-self
SYMBOL: fake-local
<lexenv>
some-class >>class
fake-self >>self
H{ { "mumble" fake-local } } >>local-readers
H{ { "jumble" fake-local } } >>local-writers
lexenv set
[ [ fake-local ] ] [ "mumble" lexenv get lookup-reader ] unit-test
[ [ fake-self x>> ] ] [ "x" lexenv get lookup-reader ] unit-test
[ [ \ tuple ] ] [ "Object" lexenv get lookup-reader ] unit-test
[ [ fake-local ] ] [ "jumble" lexenv get lookup-writer ] unit-test
[ [ fake-self (>>y) ] ] [ "y" lexenv get lookup-writer ] unit-test
[ "blahblah" lexenv get lookup-writer ] must-fail

View File

@ -0,0 +1,67 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: assocs kernel accessors quotations slots words
sequences namespaces combinators combinators.short-circuit
summary smalltalk.classes ;
IN: smalltalk.compiler.lexenv
! local-readers: assoc string => word
! local-writers: assoc string => word
! self: word or f for top-level forms
! class: class word or f for top-level forms
! method: generic word or f for top-level forms
TUPLE: lexenv local-readers local-writers self return class method ;
: <lexenv> ( -- lexenv ) lexenv new ; inline
CONSTANT: empty-lexenv T{ lexenv }
: lexenv-union ( lexenv1 lexenv2 -- lexenv )
[ <lexenv> ] 2dip {
[ [ local-readers>> ] bi@ assoc-union >>local-readers ]
[ [ local-writers>> ] bi@ assoc-union >>local-writers ]
[ [ self>> ] either? >>self ]
[ [ return>> ] either? >>return ]
[ [ class>> ] either? >>class ]
[ [ method>> ] either? >>method ]
} 2cleave ;
: local-reader ( name lexenv -- local )
local-readers>> at dup [ 1quotation ] when ;
: ivar-reader ( name lexenv -- quot/f )
dup class>> [
[ class>> "slots" word-prop slot-named ] [ self>> ] bi
swap dup [ name>> reader-word [ ] 2sequence ] [ 2drop f ] if
] [ 2drop f ] if ;
: class-name ( name -- quot/f )
classes get at dup [ [ ] curry ] when ;
ERROR: bad-identifier name ;
M: bad-identifier summary drop "Unknown identifier" ;
: lookup-reader ( name lexenv -- reader-quot )
{
[ local-reader ]
[ ivar-reader ]
[ drop class-name ]
[ drop bad-identifier ]
} 2|| ;
: local-writer ( name lexenv -- local )
local-writers>> at dup [ 1quotation ] when ;
: ivar-writer ( name lexenv -- quot/f )
dup class>> [
[ class>> "slots" word-prop slot-named ] [ self>> ] bi
swap dup [ name>> writer-word [ ] 2sequence ] [ 2drop f ] if
] [ 2drop f ] if ;
: lookup-writer ( name lexenv -- writer-quot )
{
[ local-writer ]
[ ivar-writer ]
[ drop bad-identifier ]
} 2|| ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,3 @@
USING: smalltalk.parser smalltalk.compiler.return tools.test ;
[ t ] [ "(i <= 1) ifTrue: [^1] ifFalse: [^((Fib new i:(i-1)) compute + (Fib new i:(i-2)) compute)]" parse-smalltalk need-return-continuation? ] unit-test

View File

@ -0,0 +1,45 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays combinators.short-circuit continuations
fry generalizations kernel locals locals.types locals.rewrite.closures
namespaces make sequences smalltalk.ast ;
IN: smalltalk.compiler.return
SYMBOL: return-continuation
GENERIC: need-return-continuation? ( ast -- ? )
M: ast-return need-return-continuation? drop t ;
M: ast-block need-return-continuation? body>> need-return-continuation? ;
M: ast-message-send need-return-continuation?
{
[ receiver>> need-return-continuation? ]
[ arguments>> need-return-continuation? ]
} 1|| ;
M: ast-cascade need-return-continuation?
{
[ receiver>> need-return-continuation? ]
[ messages>> need-return-continuation? ]
} 1|| ;
M: ast-message need-return-continuation?
arguments>> need-return-continuation? ;
M: ast-assignment need-return-continuation?
value>> need-return-continuation? ;
M: ast-sequence need-return-continuation?
body>> need-return-continuation? ;
M: array need-return-continuation? [ need-return-continuation? ] any? ;
M: object need-return-continuation? drop f ;
:: make-return ( quot n lexenv block -- quot )
block need-return-continuation? [
quot clone [ lexenv return>> <def> '[ _ ] prepend ] change-body
n '[ _ _ ncurry callcc1 ]
] [ quot ] if rewrite-closures first ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,11 @@
IN: smalltalk.eval.tests
USING: smalltalk.eval tools.test io.streams.string kernel ;
[ 3 ] [ "1+2" eval-smalltalk ] unit-test
[ "HAI" ] [ "(1<10) ifTrue:['HAI'] ifFalse:['BAI']" eval-smalltalk ] unit-test
[ 7 ] [ "1+2+3;+4" eval-smalltalk ] unit-test
[ 6 "5\n6\n" ] [ [ "[:x|x print] value: 5; value: 6" eval-smalltalk ] with-string-writer ] unit-test
[ 5 ] [ "|x| x:=5. x" eval-smalltalk ] unit-test
[ 11 ] [ "[:i| |x| x:=5. i+x] value: 6" eval-smalltalk ] unit-test
[ t ] [ "class Blah [method foo [5]]. Blah new foo" eval-smalltalk tuple? ] unit-test
[ 196418 ] [ "vocab:smalltalk/eval/fib.st" eval-smalltalk-file ] unit-test

View File

@ -0,0 +1,13 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: io.files io.encodings.utf8
compiler.units smalltalk.parser smalltalk.compiler
smalltalk.library ;
IN: smalltalk.eval
: eval-smalltalk ( string -- result )
[ parse-smalltalk compile-smalltalk ] with-compilation-unit
call( -- result ) ;
: eval-smalltalk-file ( path -- result )
utf8 file-contents eval-smalltalk ;

View File

@ -0,0 +1,11 @@
class Fib [
|i|
method i: newI [i:=newI].
method compute [
(i <= 1)
ifTrue: [^1]
ifFalse: [^((Fib new i:(i-1)) compute + (Fib new i:(i-2)) compute)]
].
].
[(Fib new i: 26) compute] time

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,101 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel present io math sequences assocs math.ranges
math.order fry tools.time locals smalltalk.selectors
smalltalk.ast smalltalk.classes ;
IN: smalltalk.library
SELECTOR: print
SELECTOR: asString
M: object selector-print dup present print ;
M: object selector-asString present ;
SELECTOR: print:
SELECTOR: nextPutAll:
SELECTOR: tab
SELECTOR: nl
M: object selector-print: [ present ] dip stream-print nil ;
M: object selector-nextPutAll: selector-print: ;
M: object selector-tab " " swap selector-print: ;
M: object selector-nl stream-nl nil ;
SELECTOR: +
SELECTOR: -
SELECTOR: *
SELECTOR: /
SELECTOR: <
SELECTOR: >
SELECTOR: <=
SELECTOR: >=
SELECTOR: =
M: object selector-+ swap + ;
M: object selector-- swap - ;
M: object selector-* swap * ;
M: object selector-/ swap / ;
M: object selector-< swap < ;
M: object selector-> swap > ;
M: object selector-<= swap <= ;
M: object selector->= swap >= ;
M: object selector-= swap = ;
SELECTOR: min:
SELECTOR: max:
M: object selector-min: min ;
M: object selector-max: max ;
SELECTOR: ifTrue:
SELECTOR: ifFalse:
SELECTOR: ifTrue:ifFalse:
M: object selector-ifTrue: [ call( -- result ) ] [ drop nil ] if ;
M: object selector-ifFalse: [ drop nil ] [ call( -- result ) ] if ;
M: object selector-ifTrue:ifFalse: [ drop call( -- result ) ] [ nip call( -- result ) ] if ;
SELECTOR: isNil
M: object selector-isNil nil eq? ;
SELECTOR: at:
SELECTOR: at:put:
M: sequence selector-at: nth ;
M: sequence selector-at:put: ( key value receiver -- receiver ) [ swapd set-nth ] keep ;
M: assoc selector-at: at ;
M: assoc selector-at:put: ( key value receiver -- receiver ) [ swapd set-at ] keep ;
SELECTOR: do:
M:: object selector-do: ( quot receiver -- nil )
receiver [ quot call( elt -- result ) drop ] each nil ;
SELECTOR: to:
SELECTOR: to:do:
M: object selector-to: swap [a,b] ;
M:: object selector-to:do: ( to quot from -- nil )
from to [a,b] [ quot call( i -- result ) drop ] each nil ;
SELECTOR: value
SELECTOR: value:
SELECTOR: value:value:
SELECTOR: value:value:value:
SELECTOR: value:value:value:value:
M: object selector-value call( -- result ) ;
M: object selector-value: call( input -- result ) ;
M: object selector-value:value: call( input input -- result ) ;
M: object selector-value:value:value: call( input input input -- result ) ;
M: object selector-value:value:value:value: call( input input input input -- result ) ;
SELECTOR: new
M: object selector-new new ;
SELECTOR: time
M: object selector-time '[ _ call( -- result ) ] time ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,18 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel prettyprint io io.styles colors.constants compiler.units
fry debugger sequences locals.rewrite.closures smalltalk.ast
smalltalk.eval smalltalk.printer smalltalk.listener ;
IN: smalltalk.listener
: eval-interactively ( string -- )
'[
_ eval-smalltalk
dup nil? [ drop ] [ "Result: " write smalltalk>string print ] if
] try ;
: smalltalk-listener ( -- )
"Smalltalk>" { { background COLOR: light-blue } } format bl flush readln
[ eval-interactively smalltalk-listener ] when* ;
MAIN: smalltalk-listener

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,300 @@
IN: smalltalk.parser.tests
USING: smalltalk.parser smalltalk.ast
peg.ebnf tools.test accessors
io.files io.encodings.ascii kernel ;
EBNF: test-Character
test = <foreign parse-smalltalk Character>
;EBNF
[ CHAR: a ] [ "a" test-Character ] unit-test
EBNF: test-Comment
test = <foreign parse-smalltalk Comment>
;EBNF
[ T{ ast-comment f "Hello, this is a comment." } ]
[ "\"Hello, this is a comment.\"" test-Comment ]
unit-test
[ T{ ast-comment f "Hello, \"this\" is a comment." } ]
[ "\"Hello, \"\"this\"\" is a comment.\"" test-Comment ]
unit-test
EBNF: test-Identifier
test = <foreign parse-smalltalk Identifier>
;EBNF
[ "OrderedCollection" ] [ "OrderedCollection" test-Identifier ] unit-test
EBNF: test-Literal
test = <foreign parse-smalltalk Literal>
;EBNF
[ nil ] [ "nil" test-Literal ] unit-test
[ 123 ] [ "123" test-Literal ] unit-test
[ HEX: deadbeef ] [ "16rdeadbeef" test-Literal ] unit-test
[ -123 ] [ "-123" test-Literal ] unit-test
[ 1.2 ] [ "1.2" test-Literal ] unit-test
[ -1.24 ] [ "-1.24" test-Literal ] unit-test
[ 12.4e7 ] [ "12.4e7" test-Literal ] unit-test
[ 12.4e-7 ] [ "12.4e-7" test-Literal ] unit-test
[ -12.4e7 ] [ "-12.4e7" test-Literal ] unit-test
[ CHAR: x ] [ "$x" test-Literal ] unit-test
[ "Hello, world" ] [ "'Hello, world'" test-Literal ] unit-test
[ "Hello, 'funny' world" ] [ "'Hello, ''funny'' world'" test-Literal ] unit-test
[ T{ symbol f "foo" } ] [ "#foo" test-Literal ] unit-test
[ T{ symbol f "+" } ] [ "#+" test-Literal ] unit-test
[ T{ symbol f "at:put:" } ] [ "#at:put:" test-Literal ] unit-test
[ T{ symbol f "Hello world" } ] [ "#'Hello world'" test-Literal ] unit-test
[ B{ 1 2 3 4 } ] [ "#[1 2 3 4]" test-Literal ] unit-test
[ { nil t f } ] [ "#(nil true false)" test-Literal ] unit-test
[ { nil { t f } } ] [ "#(nil (true false))" test-Literal ] unit-test
[ T{ ast-block f { } { } { } } ] [ "[]" test-Literal ] unit-test
[ T{ ast-block f { "x" } { } { T{ ast-return f T{ ast-name f "x" } } } } ] [ "[ :x|^x]" test-Literal ] unit-test
[ T{ ast-block f { } { } { T{ ast-return f self } } } ] [ "[^self]" test-Literal ] unit-test
[
T{ ast-block
{ arguments { "i" } }
{ body
{
T{ ast-message-send
{ receiver T{ ast-name { name "i" } } }
{ selector "print" }
}
}
}
}
]
[ "[ :i | i print ]" test-Literal ] unit-test
[
T{ ast-block
{ body { 5 self } }
}
]
[ "[5. self]" test-Literal ] unit-test
EBNF: test-FormalBlockArgumentDeclarationList
test = <foreign parse-smalltalk FormalBlockArgumentDeclarationList>
;EBNF
[ V{ "x" "y" "elt" } ] [ ":x :y :elt" test-FormalBlockArgumentDeclarationList ] unit-test
EBNF: test-Operand
test = <foreign parse-smalltalk Operand>
;EBNF
[ { 123 15.6 { t f } } ] [ "#(123 15.6 (true false))" test-Operand ] unit-test
[ T{ ast-name f "x" } ] [ "x" test-Operand ] unit-test
EBNF: test-Expression
test = <foreign parse-smalltalk Expression>
;EBNF
[ self ] [ "self" test-Expression ] unit-test
[ { 123 15.6 { t f } } ] [ "#(123 15.6 (true false))" test-Expression ] unit-test
[ T{ ast-name f "x" } ] [ "x" test-Expression ] unit-test
[ T{ ast-message-send f 5 "print" { } } ] [ "5 print" test-Expression ] unit-test
[ T{ ast-message-send f T{ ast-message-send f 5 "squared" { } } "print" { } } ] [ "5 squared print" test-Expression ] unit-test
[ T{ ast-message-send f 2 "+" { 2 } } ] [ "2+2" test-Expression ] unit-test
[
T{ ast-message-send f
T{ ast-message-send f 3 "factorial" { } }
"+"
{ T{ ast-message-send f 4 "factorial" { } } }
}
]
[ "3 factorial + 4 factorial" test-Expression ] unit-test
[
T{ ast-message-send f
T{ ast-message-send f 3 "factorial" { } }
"+"
{ T{ ast-message-send f 4 "factorial" { } } }
}
]
[ " 3 factorial + 4 factorial" test-Expression ] unit-test
[
T{ ast-message-send f
T{ ast-message-send f 3 "factorial" { } }
"+"
{ T{ ast-message-send f 4 "factorial" { } } }
}
]
[ " 3 factorial + 4 factorial " test-Expression ] unit-test
[
T{ ast-message-send f
T{ ast-message-send f
T{ ast-message-send f 3 "factorial" { } }
"+"
{ 4 }
}
"factorial"
{ }
}
]
[ "(3 factorial + 4) factorial" test-Expression ] unit-test
[
T{ ast-message-send
{ receiver
T{ ast-message-send
{ receiver
T{ ast-message-send
{ receiver 1 }
{ selector "<" }
{ arguments { 10 } }
}
}
{ selector "ifTrue:ifFalse:" }
{ arguments
{
T{ ast-block { body { "HI" } } }
T{ ast-block { body { "BYE" } } }
}
}
}
}
{ selector "print" }
}
]
[ "((1 < 10) ifTrue: [ 'HI' ] ifFalse: [ 'BYE' ]) print" test-Expression ] unit-test
[
T{ ast-cascade
{ receiver 12 }
{ messages
{
T{ ast-message f "sqrt" }
T{ ast-message f "+" { 2 } }
}
}
}
]
[ "12 sqrt; + 2" test-Expression ] unit-test
[
T{ ast-cascade
{ receiver T{ ast-message-send f 12 "sqrt" } }
{ messages
{
T{ ast-message f "+" { 1 } }
T{ ast-message f "+" { 2 } }
}
}
}
]
[ "12 sqrt + 1; + 2" test-Expression ] unit-test
[
T{ ast-cascade
{ receiver T{ ast-message-send f 12 "squared" } }
{ messages
{
T{ ast-message f "to:" { 100 } }
T{ ast-message f "sqrt" }
}
}
}
]
[ "12 squared to: 100; sqrt" test-Expression ] unit-test
[
T{ ast-message-send f
T{ ast-message-send f 1 "+" { 2 } }
"*"
{ 3 }
}
]
[ "1+2*3" test-Expression ] unit-test
[
T{ ast-message-send
{ receiver
T{ ast-message-send
{ receiver { T{ ast-block { body { "a" } } } } }
{ selector "at:" }
{ arguments { 0 } }
}
}
{ selector "value" }
}
]
[ "(#(['a']) at: 0) value" test-Expression ] unit-test
EBNF: test-FinalStatement
test = <foreign parse-smalltalk FinalStatement>
;EBNF
[ T{ ast-name f "value" } ] [ "value" test-FinalStatement ] unit-test
[ T{ ast-return f T{ ast-name f "value" } } ] [ "^value" test-FinalStatement ] unit-test
[ T{ ast-assignment f T{ ast-name f "value" } 5 } ] [ "value:=5" test-FinalStatement ] unit-test
EBNF: test-LocalVariableDeclarationList
test = <foreign parse-smalltalk LocalVariableDeclarationList>
;EBNF
[ T{ ast-local-variables f { "i" "j" } } ] [ " | i j |" test-LocalVariableDeclarationList ] unit-test
[ T{ ast-message-send f T{ ast-name f "x" } "foo:bar:" { 1 2 } } ]
[ "x foo:1 bar:2" test-Expression ] unit-test
[
T{ ast-message-send
f
T{ ast-message-send f
T{ ast-message-send f 3 "factorial" { } }
"+"
{ T{ ast-message-send f 4 "factorial" { } } }
}
"between:and:"
{ 10 100 }
}
]
[ "3 factorial + 4 factorial between: 10 and: 100" test-Expression ] unit-test
[ T{ ast-sequence f { } { 1 2 } } ] [ "1. 2" parse-smalltalk ] unit-test
[ T{ ast-sequence f { } { 1 2 } } ] [ "1. 2." parse-smalltalk ] unit-test
[
T{ ast-sequence f { }
{
T{ ast-class
{ name "Test" }
{ superclass "Object" }
{ ivars { "a" } }
}
}
}
]
[ "class Test [|a|]" parse-smalltalk ] unit-test
[
T{ ast-sequence f { }
{
T{ ast-class
{ name "Test1" }
{ superclass "Object" }
{ ivars { "a" } }
}
T{ ast-class
{ name "Test2" }
{ superclass "Test1" }
{ ivars { "b" } }
}
}
}
]
[ "class Test1 [|a|]. class Test2 extends Test1 [|b|]" parse-smalltalk ] unit-test
[ ] [ "class Foo []. Tests blah " parse-smalltalk drop ] unit-test
[ ] [ "vocab:smalltalk/parser/test.st" ascii file-contents parse-smalltalk drop ] unit-test

View File

@ -0,0 +1,228 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: peg peg.ebnf smalltalk.ast sequences sequences.deep strings
math.parser kernel arrays byte-arrays math assocs accessors ;
IN: smalltalk.parser
! :mode=text:noTabs=true:
! Based on http://chronos-st.blogspot.com/2007/12/smalltalk-in-one-page.html
ERROR: bad-number str ;
: check-number ( str -- n )
>string dup string>number [ ] [ bad-number ] ?if ;
EBNF: parse-smalltalk
Character = .
WhitespaceCharacter = (" " | "\t" | "\n" | "\r" )
DecimalDigit = [0-9]
Letter = [A-Za-z]
CommentCharacter = [^"] | '""' => [[ CHAR: " ]]
Comment = '"' (CommentCharacter)*:s '"' => [[ s >string ast-comment boa ]]
OptionalWhiteSpace = (WhitespaceCharacter | Comment)*
Whitespace = (WhitespaceCharacter | Comment)+
LetterOrDigit = DecimalDigit | Letter
Identifier = (Letter | "_"):h (LetterOrDigit | "_")*:t => [[ { h t } flatten >string ]]
Reference = Identifier => [[ ast-name boa ]]
ConstantReference = "nil" => [[ nil ]]
| "false" => [[ f ]]
| "true" => [[ t ]]
PseudoVariableReference = "self" => [[ self ]]
| "super" => [[ super ]]
ReservedIdentifier = PseudoVariableReference | ConstantReference
BindableIdentifier = Identifier
UnaryMessageSelector = Identifier
Keyword = Identifier:i ":" => [[ i ":" append ]]
KeywordMessageSelector = Keyword+ => [[ concat ]]
BinarySelectorChar = "~" | "!" | "@" | "%" | "&" | "*" | "-" | "+"
| "=" | "|" | "\" | "<" | ">" | "," | "?" | "/"
BinaryMessageSelector = BinarySelectorChar+ => [[ concat ]]
OptionalMinus = ("-" => [[ CHAR: - ]])?
IntegerLiteral = (OptionalMinus:m UnsignedIntegerLiteral:i) => [[ i m [ neg ] when ]]
UnsignedIntegerLiteral = Radix:r "r" BaseNIntegerLiteral:b => [[ b >string r base> ]]
| DecimalIntegerLiteral => [[ check-number ]]
DecimalIntegerLiteral = DecimalDigit+
Radix = DecimalIntegerLiteral => [[ check-number ]]
BaseNIntegerLiteral = LetterOrDigit+
FloatingPointLiteral = (OptionalMinus
DecimalIntegerLiteral
("." => [[ CHAR: . ]] DecimalIntegerLiteral Exponent? | Exponent))
=> [[ flatten check-number ]]
Exponent = "e" => [[ CHAR: e ]] (OptionalMinus DecimalIntegerLiteral)?
CharacterLiteral = "$" Character:c => [[ c ]]
StringLiteral = "'" (StringLiteralCharacter | "''" => [[ CHAR: ' ]])*:s "'"
=> [[ s >string ]]
StringLiteralCharacter = [^']
SymbolInArrayLiteral = KeywordMessageSelector
| UnaryMessageSelector
| BinaryMessageSelector
SymbolLiteral = "#" (SymbolInArrayLiteral | StringLiteral):s => [[ s intern ]]
ArrayLiteral = (ObjectArrayLiteral | ByteArrayLiteral)
ObjectArrayLiteral = "#" NestedObjectArrayLiteral:elts => [[ elts ]]
NestedObjectArrayLiteral = "(" OptionalWhiteSpace
(LiteralArrayElement:h
(Whitespace LiteralArrayElement:e => [[ e ]])*:t
=> [[ t h prefix ]]
)?:elts OptionalWhiteSpace ")" => [[ elts >array ]]
LiteralArrayElement = Literal
| NestedObjectArrayLiteral
| SymbolInArrayLiteral
| ConstantReference
ByteArrayLiteral = "#[" OptionalWhiteSpace
(UnsignedIntegerLiteral:h
(Whitespace UnsignedIntegerLiteral:i => [[ i ]])*:t
=> [[ t h prefix ]]
)?:elts OptionalWhiteSpace "]" => [[ elts >byte-array ]]
FormalBlockArgumentDeclaration = ":" BindableIdentifier:i => [[ i ]]
FormalBlockArgumentDeclarationList =
FormalBlockArgumentDeclaration:h
(Whitespace FormalBlockArgumentDeclaration:v => [[ v ]])*:t
=> [[ t h prefix ]]
BlockLiteral = "["
(OptionalWhiteSpace
FormalBlockArgumentDeclarationList:args
OptionalWhiteSpace
"|"
=> [[ args ]]
)?:args
ExecutableCode:body
"]" => [[ args >array body <ast-block> ]]
Literal = (ConstantReference
| FloatingPointLiteral
| IntegerLiteral
| CharacterLiteral
| StringLiteral
| ArrayLiteral
| SymbolLiteral
| BlockLiteral)
NestedExpression = "(" Statement:s OptionalWhiteSpace ")" => [[ s ]]
Operand = Literal
| PseudoVariableReference
| Reference
| NestedExpression
UnaryMessage = OptionalWhiteSpace
UnaryMessageSelector:s !(":")
=> [[ s { } ast-message boa ]]
BinaryMessage = OptionalWhiteSpace
BinaryMessageSelector:selector
OptionalWhiteSpace
(UnaryMessageSend | Operand):rhs
=> [[ selector { rhs } ast-message boa ]]
KeywordMessageSegment = Keyword:k OptionalWhiteSpace (BinaryMessageSend | UnaryMessageSend | Operand):arg => [[ { k arg } ]]
KeywordMessage = OptionalWhiteSpace
KeywordMessageSegment:h
(OptionalWhiteSpace KeywordMessageSegment:s => [[ s ]])*:t
=> [[ t h prefix unzip [ concat ] dip ast-message boa ]]
Message = BinaryMessage | UnaryMessage | KeywordMessage
UnaryMessageSend = (UnaryMessageSend | Operand):lhs
UnaryMessage:h
(OptionalWhiteSpace ";" Message:m => [[ m ]])*:t
=> [[ lhs t h prefix >array <ast-cascade> ]]
BinaryMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs
BinaryMessage:h
(OptionalWhiteSpace ";" Message:m => [[ m ]])*:t
=> [[ lhs t h prefix >array <ast-cascade> ]]
KeywordMessageSend = (BinaryMessageSend | UnaryMessageSend | Operand):lhs
KeywordMessage:h
(OptionalWhiteSpace ";" Message:m => [[ m ]])*:t
=> [[ lhs t h prefix >array <ast-cascade> ]]
Expression = OptionalWhiteSpace
(KeywordMessageSend | BinaryMessageSend | UnaryMessageSend | Operand):e
=> [[ e ]]
AssignmentOperation = OptionalWhiteSpace BindableIdentifier:i
OptionalWhiteSpace ":=" OptionalWhiteSpace => [[ i ast-name boa ]]
AssignmentStatement = AssignmentOperation:a Statement:s => [[ a s ast-assignment boa ]]
Statement = ClassDeclaration | ForeignClassDeclaration | AssignmentStatement | Expression
MethodReturnOperator = OptionalWhiteSpace "^"
FinalStatement = (MethodReturnOperator Statement:s => [[ s ast-return boa ]])
| Statement
LocalVariableDeclarationList = OptionalWhiteSpace "|" OptionalWhiteSpace
(BindableIdentifier:h
(Whitespace BindableIdentifier:b => [[ b ]])*:t
=> [[ t h prefix ]]
)?:b OptionalWhiteSpace "|" => [[ b >array ast-local-variables boa ]]
EndStatement = "."
ExecutableCode = (LocalVariableDeclarationList)?:locals
(Statement:s OptionalWhiteSpace EndStatement => [[ s ]])*:h
(FinalStatement:t (EndStatement)? => [[ t ]])?:t
OptionalWhiteSpace
=> [[ h t [ suffix ] when* locals [ prefix ] when* >array ]]
TopLevelForm = ExecutableCode => [[ <ast-sequence> ]]
UnaryMethodHeader = UnaryMessageSelector:selector
=> [[ { selector { } } ]]
BinaryMethodHeader = BinaryMessageSelector:selector OptionalWhiteSpace BindableIdentifier:identifier
=> [[ { selector { identifier } } ]]
KeywordMethodHeaderSegment = Keyword:keyword
OptionalWhiteSpace
BindableIdentifier:identifier => [[ { keyword identifier } ]]
KeywordMethodHeader = KeywordMethodHeaderSegment:h (Whitespace KeywordMethodHeaderSegment:s => [[ s ]])*:t
=> [[ t h prefix unzip [ concat ] dip 2array ]]
MethodHeader = KeywordMethodHeader
| BinaryMethodHeader
| UnaryMethodHeader
MethodDeclaration = OptionalWhiteSpace "method" OptionalWhiteSpace MethodHeader:header
OptionalWhiteSpace "["
ExecutableCode:code
"]"
=> [[ header first2 code <ast-method> ]]
ClassDeclaration = OptionalWhiteSpace "class" OptionalWhiteSpace Identifier:name
OptionalWhiteSpace
("extends" OptionalWhiteSpace Identifier:superclass OptionalWhiteSpace => [[ superclass ]])?:superclass
OptionalWhiteSpace "["
(OptionalWhiteSpace LocalVariableDeclarationList:l => [[ l names>> ]])?:ivars
(MethodDeclaration:h
(OptionalWhiteSpace
EndStatement
OptionalWhiteSpace
MethodDeclaration:m => [[ m ]])*:t (EndStatement)?
=> [[ t h prefix ]]
)?:methods
OptionalWhiteSpace "]"
=> [[ name superclass "Object" or ivars >array methods >array ast-class boa ]]
ForeignClassDeclaration = OptionalWhiteSpace "foreign"
OptionalWhiteSpace Identifier:name
OptionalWhiteSpace Literal:class
=> [[ class name ast-foreign boa ]]
End = !(.)
Program = TopLevelForm End
;EBNF

View File

@ -0,0 +1,65 @@
class TreeNode extends Object [
|left right item|
method binarytrees: n to: output [
| minDepth maxDepth stretchDepth check longLivedTree iterations |
minDepth := 4.
maxDepth := minDepth + 2 max: n.
stretchDepth := maxDepth + 1.
check := (TreeNode bottomUpTree: 0 depth: stretchDepth) itemCheck.
output
nextPutAll: 'stretch tree of depth '; print: stretchDepth; tab;
nextPutAll: ' check: '; print: check; nl.
longLivedTree := TreeNode bottomUpTree: 0 depth: maxDepth.
minDepth to: maxDepth by: 2 do: [:depth|
iterations := 1 bitShift: maxDepth - depth + minDepth.
check := 0.
1 to: iterations do: [:i|
check := check + (TreeNode bottomUpTree: i depth: depth) itemCheck.
check := check + (TreeNode bottomUpTree: -1*i depth: depth) itemCheck
].
output
print: (2*iterations); tab;
nextPutAll: ' trees of depth '; print: depth; tab;
nextPutAll: ' check: '; print: check; nl
].
output
nextPutAll: 'long lived tree of depth '; print: maxDepth; tab;
nextPutAll: ' check: '; print: longLivedTree itemCheck; nl
].
method binarytrees: arg [
self binarytrees: arg to: self stdout.
^''
].
method left: leftChild right: rightChild item: anItem [
left := leftChild.
right := rightChild.
item := anItem
].
method itemCheck [
^left isNil
ifTrue: [item] ifFalse: [item + (left itemCheck - right itemCheck)]
].
method bottomUpTree: anItem depth: anInteger [
^(anInteger > 0)
ifTrue: [
self
left: (self bottomUpTree: 2*anItem - 1 depth: anInteger - 1)
right: (self bottomUpTree: 2*anItem depth: anInteger - 1)
item: anItem
] ifFalse: [self left: nil right: nil item: anItem]
].
method left: leftChild right: rightChild item: anItem [
^(super new) left: leftChild right: rightChild item: anItem
]
].

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,4 @@
IN: smalltalk.printer.tests
USING: smalltalk.printer tools.test ;
[ "#((1 2) 'hi')" ] [ { { 1 2 } "hi" } smalltalk>string ] unit-test

View File

@ -0,0 +1,34 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays byte-arrays kernel make math
math.parser prettyprint sequences smalltalk.ast strings ;
IN: smalltalk.printer
GENERIC: smalltalk>string ( object -- string )
M: real smalltalk>string number>string ;
M: string smalltalk>string
[
"'" %
[ dup CHAR: ' = [ dup , , ] [ , ] if ] each
"'" %
] "" make ;
GENERIC: array-element>string ( object -- string )
M: object array-element>string smalltalk>string ;
M: array array-element>string
[ array-element>string ] map " " join "(" ")" surround ;
M: array smalltalk>string
array-element>string "#" prepend ;
M: byte-array smalltalk>string
[ number>string ] { } map-as " " join "#[" "]" surround ;
M: symbol smalltalk>string
name>> smalltalk>string "#" prepend ;
M: object smalltalk>string unparse-short ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,28 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators effects generic generic.standard
kernel sequences words lexer ;
IN: smalltalk.selectors
SYMBOLS: unary binary keyword ;
: selector-type ( selector -- type )
{
{ [ dup [ "~!@%&*-+=|\\<>,?/" member? ] all? ] [ binary ] }
{ [ CHAR: : over member? ] [ keyword ] }
[ unary ]
} cond nip ;
: selector>effect ( selector -- effect )
dup selector-type {
{ unary [ drop 0 ] }
{ binary [ drop 1 ] }
{ keyword [ [ CHAR: : = ] count ] }
} case "receiver" suffix { "result" } <effect> ;
: selector>generic ( selector -- generic )
[ "selector-" prepend "smalltalk.selectors" create dup ]
[ selector>effect ]
bi define-simple-generic ;
SYNTAX: SELECTOR: scan selector>generic drop ;

View File

@ -4,11 +4,12 @@ USING: accessors fry html.parser html.parser.analyzer
http.client kernel tools.time sets assocs sequences
concurrency.combinators io threads namespaces math multiline
math.parser inspector urls logging combinators.short-circuit
continuations calendar prettyprint dlists deques locals ;
continuations calendar prettyprint dlists deques locals
present ;
IN: spider
TUPLE: spider base count max-count sleep max-depth initial-links
filters spidered todo nonmatching quiet ;
filters spidered todo nonmatching quiet currently-spidering ;
TUPLE: spider-result url depth headers fetch-time parsed-html
links processing-time timestamp ;
@ -25,10 +26,16 @@ TUPLE: unique-deque assoc deque ;
: <unique-deque> ( -- unique-deque )
H{ } clone <dlist> unique-deque boa ;
: url-exists? ( url unique-deque -- ? )
[ url>> ] [ assoc>> ] bi* key? ;
: push-url ( url depth unique-deque -- )
[ <todo-url> ] dip
[ [ [ t ] dip url>> ] [ assoc>> ] bi* set-at ]
[ deque>> push-back ] 2bi ;
[ <todo-url> ] dip 2dup url-exists? [
2drop
] [
[ [ [ t ] dip url>> ] [ assoc>> ] bi* set-at ]
[ deque>> push-back ] 2bi
] if ;
: pop-url ( unique-deque -- todo-url ) deque>> pop-front ;
@ -38,6 +45,7 @@ TUPLE: unique-deque assoc deque ;
>url
spider new
over >>base
over >>currently-spidering
swap 0 <unique-deque> [ push-url ] keep >>todo
<unique-deque> >>nonmatching
0 >>max-depth
@ -71,9 +79,12 @@ TUPLE: unique-deque assoc deque ;
[ add-nonmatching ]
[ tuck [ apply-filters ] 2dip add-todo ] 2bi ;
: url-absolute? ( url -- ? )
present "http://" head? ;
: normalize-hrefs ( links spider -- links' )
[ [ >url ] map ] dip
base>> swap [ derive-url ] with map ;
currently-spidering>> present swap
[ dup url-absolute? [ derive-url ] [ url-append-path >url ] if ] with map ;
: print-spidering ( url depth -- )
"depth: " write number>string write
@ -83,7 +94,7 @@ TUPLE: unique-deque assoc deque ;
f url spider spidered>> set-at
[ url http-get ] benchmark :> fetch-time :> html :> headers
[
html parse-html [ ] [ find-hrefs spider normalize-hrefs ] bi
html parse-html [ ] [ find-all-links spider normalize-hrefs ] bi
] benchmark :> processing-time :> links :> parsed-html
url depth headers fetch-time parsed-html links processing-time
now spider-result boa ;
@ -110,6 +121,7 @@ TUPLE: unique-deque assoc deque ;
} 1&& ;
: setup-next-url ( spider -- spider url depth )
dup todo>> peek-url url>> present >>currently-spidering
dup todo>> pop-url [ url>> ] [ depth>> ] bi ;
: spider-next-page ( spider -- )
@ -119,7 +131,7 @@ PRIVATE>
: run-spider-loop ( spider -- )
dup spider-page? [
[ spider-next-page ] [ run-spider-loop ] bi
[ spider-next-page ] [ spider-sleep ] [ run-spider-loop ] tri
] [
drop
] if ;