diff --git a/core/modern/authors.txt b/core/modern/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/compiler/authors.txt b/core/modern/compiler/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/compiler/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/compiler/compiler.factor b/core/modern/compiler/compiler.factor new file mode 100644 index 0000000000..b1b963da5d --- /dev/null +++ b/core/modern/compiler/compiler.factor @@ -0,0 +1,370 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs classes.mixin combinators +combinators.short-circuit definitions effects effects.parser fry +graphs io.pathnames kernel lexer locals math math.statistics +memoize modern multiline parser quotations sequences +sequences.extras sets splitting strings unicode words ; +IN: modern.compiler + +: vocab>core2-path ( vocab -- path ) + ".private" ?tail drop + "." split "/" join + [ "resource:core2/" prepend-path ] + [ file-name ".factor" append append-path ] bi ; + +: filter-using ( using -- using' ) + { "accessors" "threads.private" "threads" } diff ; + +<< + SYNTAX: STRING-DISPATCH: + [ + scan-new-word scan-effect + H{ } clone over [ in>> but-last ] [ out>> ] bi + '[ _ ?at [ throw ] unless _ call-effect ] + swap + ] with-definition define-declared ; + +SYNTAX: STRING-M: + [ + scan-token scan-word parse-definition + over changed-definition + swap def>> first swapd set-at + ] with-definition ; +>> + + +TUPLE: holder literal ; +TUPLE: comment' < holder ; +TUPLE: using' < holder ; +TUPLE: use' < holder ; +TUPLE: in' < holder ; +TUPLE: qualified-with' < holder ; +TUPLE: qualified' < holder ; +TUPLE: script' < holder ; +TUPLE: m' < holder ; +TUPLE: instance' < holder ; +TUPLE: word' < holder ; +TUPLE: generic' < holder ; +TUPLE: generic#' < holder ; +TUPLE: hook' < holder ; +TUPLE: math' < holder ; +TUPLE: constant' < holder ; +TUPLE: c' < holder ; +TUPLE: initialize' < holder ; +TUPLE: startup-hook' < holder ; +TUPLE: shutdown-hook' < holder ; +TUPLE: primitive' < holder ; +TUPLE: defer' < holder ; +TUPLE: symbols' < holder ; +TUPLE: symbol' < holder ; +TUPLE: slot' < holder ; +TUPLE: mixin' < holder ; +TUPLE: singletons' < holder ; +TUPLE: singleton' < holder ; +TUPLE: tuple' < holder ; +TUPLE: error' < holder ; +TUPLE: builtin' < holder ; +TUPLE: predicate' < holder ; +TUPLE: union' < holder ; + +! generated definitions +TUPLE: define' holder name ; +TUPLE: generate-accessor' < define' ; +TUPLE: generate-predicate' < define' ; + +: make-holder ( literal class -- obj ) + new + swap >>literal ; inline + +GENERIC: split-decorators ( seq -- base left right ) +M: compound-literal split-decorators + sequence>> + [ decorator-literal? not ] partition + [ first ] dip + [ left-decorator-literal? ] partition ; +M: object split-decorators f f ; + +! GENERIC: apply-decorator ( base decorator -- ) +! : apply-decorators ( obj seq -- obj ) ; + +GENERIC: base-literal ( obj -- obj ) +M: compound-literal base-literal + sequence>> [ decorator-literal? not ] find nip ; +M: object base-literal ; + + +GENERIC: literal>tag ( class -- string/f ) +M: line-comment-literal literal>tag drop f ; +M: uppercase-colon-literal literal>tag + tag>> [ "word" ] [ >lower ] if-empty ; +M: compound-literal literal>tag + base-literal literal>tag ; + +: literal>holder ( literal -- obj ) + [ ] [ + literal>tag [ "'" append "modern.compiler" lookup-word ] + [ \ comment' ] if* + ] bi + '[ _ make-holder ] call( obj -- obj ) ; + +: literals>holders ( literals -- holders ) + [ literal>holder ] map ; + +GENERIC: holder>definitions' ( literal -- assoc ) +M: comment' holder>definitions' drop f ; +M: using' holder>definitions' drop f ; +M: use' holder>definitions' drop f ; +M: in' holder>definitions' drop f ; +M: qualified-with' holder>definitions' drop f ; +M: qualified' holder>definitions' drop f ; +M: script' holder>definitions' drop f ; +M: m' holder>definitions' drop f ; +M: instance' holder>definitions' drop f ; + +! Single words +M: word' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: generic' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: generic#' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: hook' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: math' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: constant' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: c' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: initialize' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: startup-hook' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: shutdown-hook' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: primitive' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; +M: defer' holder>definitions' + dup literal>> base-literal payload>> first tag>> define' boa ; + +! Multiple words +M: symbols' holder>definitions' + dup literal>> base-literal payload>> [ tag>> ] map [ define' boa ] with map ; +M: symbol' holder>definitions' + dup literal>> base-literal payload>> [ tag>> ] map [ define' boa ] with map ; +M: slot' holder>definitions' + dup literal>> base-literal payload>> [ tag>> ] map + [ generate-accessor' boa ] with map ; + +! these also make class predicate? words + +GENERIC: slot-accessor-name ( obj -- string ) +M: single-matched-literal slot-accessor-name + payload>> first tag>> ">>" append ; +M: tag-literal slot-accessor-name tag>> ">>" append ; + +M: tuple' holder>definitions' + [ dup literal>> base-literal payload>> first tag>> define' boa ] + [ dup literal>> base-literal payload>> first tag>> "?" append generate-predicate' boa ] + [ + dup literal>> base-literal payload>> rest + [ slot-accessor-name generate-accessor' boa ] with map + ] tri [ 2array ] dip append ; + +M: error' holder>definitions' + [ dup literal>> base-literal payload>> first tag>> define' boa ] + [ dup literal>> base-literal payload>> first tag>> "?" append generate-predicate' boa ] + [ + dup literal>> base-literal payload>> rest + [ slot-accessor-name generate-accessor' boa ] with map + ] tri [ 2array ] dip append ; + +M: builtin' holder>definitions' + [ dup literal>> base-literal payload>> first tag>> define' boa ] + [ dup literal>> base-literal payload>> first tag>> "?" append generate-predicate' boa ] bi 2array ; +M: predicate' holder>definitions' + [ dup literal>> base-literal payload>> first tag>> define' boa ] + [ dup literal>> base-literal payload>> first tag>> "?" append generate-predicate' boa ] bi 2array ; +M: union' holder>definitions' + [ dup literal>> base-literal payload>> first tag>> define' boa ] + [ dup literal>> base-literal payload>> first tag>> "?" append generate-predicate' boa ] bi 2array ; + +! Multiple and class predicates +M: mixin' holder>definitions' + [ dup literal>> base-literal payload>> [ tag>> ] map [ define' boa ] with map ] + [ dup literal>> base-literal payload>> [ tag>> "?" append ] map [ generate-predicate' boa ] with map ] bi append ; + +M: singletons' holder>definitions' + [ dup literal>> base-literal payload>> [ tag>> ] map [ define' boa ] with map ] + [ dup literal>> base-literal payload>> [ tag>> "?" append ] map [ generate-predicate' boa ] with map ] bi append ; + +M: singleton' holder>definitions' + [ dup literal>> base-literal payload>> [ tag>> ] map [ define' boa ] with map ] + [ dup literal>> base-literal payload>> [ tag>> "?" append ] map [ generate-predicate' boa ] with map ] bi append ; + +: holder>definitions ( obj -- seq ) + holder>definitions' dup sequence? [ 1array ] unless ; + +: holders>definitions ( holders -- seq ) + [ holder>definitions ] map concat ; + + +: holders>using ( holders -- using ) + [ { [ using'? ] [ use'? ] } 1|| ] filter + [ literal>> payload>> [ tag>> ] map ] map concat ; + +: holders>in ( holders -- using ) + [ in'? ] filter + [ literal>> payload>> [ tag>> ] map ] map concat ; + +GENERIC: handle-colon-tag ( seq tag -- obj ) +GENERIC: handle-paren-tag ( seq tag -- obj ) +! M: f handle-paren-tag drop ; +GENERIC: handle-brace-tag ( seq tag -- obj ) +GENERIC: handle-bracket-tag ( seq tag -- obj ) +GENERIC: handle-string-tag ( seq tag -- obj ) + +ERROR: word-not-found word ; +: lookup-in-namespace ( key namespace -- obj/f ) + ?at [ + ] [ + word-not-found + ] if ; + +GENERIC# lookup-literal 1 ( literal namespace -- obj ) + +M: tag-literal lookup-literal + [ tag>> ] dip lookup-in-namespace ; + +: lookup-sequence ( seq namespace -- obj ) + '[ _ lookup-literal ] map ; + +ERROR: unknown-tag tag ; + +ERROR: unknown-single-matched-delimiter sequence tag ch ; +M: single-matched-literal lookup-literal + [ [ payload>> ] dip lookup-sequence ] + [ [ tag>> ] dip over empty? [ 2drop f ] [ lookup-literal ] if ] + [ drop delimiter>> ] 2tri + { + { "(" [ handle-paren-tag ] } + { "{" [ handle-brace-tag ] } + { "[" [ handle-bracket-tag ] } + { ":" [ handle-colon-tag ] } + { "\"" [ handle-string-tag ] } + [ unknown-single-matched-delimiter ] + } case ; + + +GENERIC: definition>quotation ( namespace name definition -- quot ) +M: define' definition>quotation + holder>> definition>quotation + ; + + +! Done by update-classes +M: generate-predicate' definition>quotation 3drop f ; + + +GENERIC: stack-effect? ( obj -- ? ) +M: single-matched-literal stack-effect? { [ tag>> ] [ delimiter>> "(" = ] } 1&& ; +M: object stack-effect? drop f ; + +ERROR: word-expects-stack-effect ; +: ensure-stack-effect ( obj -- ? ) + dup stack-effect? [ word-expects-stack-effect ] unless ; + +ERROR: word-expects-name-effect-body payload ; +: name-effect-body ( payload -- name effect body ) + payload>> dup length 2 < [ word-expects-name-effect-body ] when + [ first2 ensure-stack-effect ] [ 2 tail ] bi ; + +: body>quotation ( body namespace -- quot ) + 2drop [ ] + ; + +M:: word' definition>quotation ( namespace name definition -- quot ) + definition literal>> base-literal + name-effect-body :> ( name' effect body ) + name + body namespace body>quotation + effect namespace lookup-literal + '[ _ _ _ define-declared ] ; + +M: mixin' definition>quotation + ! literal>> base-literal payload>> first tag>> >string + drop nip '[ _ define-mixin-class ] ; + +M: object definition>quotation + 3drop [ ] ; + +TUPLE: manifest2 name literals holders definitions definition-assoc namespaces ; + +: ( name literals holders definitions -- manifest2 ) + manifest2 new + swap >>definitions + dup definitions>> [ [ name>> ] keep ] { } map>assoc >>definition-assoc + swap >>holders + swap >>literals + swap ".private" ?tail drop >>name ; inline + +: manifest>scoped-words ( manifest -- seq ) + [ name>> ] [ definition-assoc>> keys ] bi + [ ":" glue ] with map ; + +: manifest>own-namespace ( manifest -- namespace ) + [ definition-assoc>> keys ] [ manifest>scoped-words [ 1array ] map ] bi + zip ; + +: manifest>using ( manifest -- seq ) + holders>> holders>using ; + +DEFER: load-modern +: manifest>combined-namespace ( manifest -- namespaces ) + [ manifest>using [ load-modern manifest>own-namespace ] map sift members H{ } clone [ assoc-union ] reduce ] + [ manifest>own-namespace ] bi assoc-union ; + +: manifest>quotation ( manifest -- quot ) + [ manifest>combined-namespace ] [ definitions>> ] bi + [ [ name>> ] [ ] bi definition>quotation ] with { } map-as concat ; + +GENERIC: add-predicates ( obj -- seq ) +M: string add-predicates dup "?" append 2array ; +M: sequence add-predicates [ add-predicates ] map concat ; + + +: manifest>definitions ( manifest -- namespace ) + [ name>> ] + [ definitions>> [ name>> ] map ] bi + [ ":" glue ] with map ; + +: literals>manifest ( name/f literals -- manifest ) + dup literals>holders + dup holders>definitions ; + +: string>manifest ( string -- manifest ) + string>literals f swap literals>manifest ; + +MEMO: load-modern ( name -- literals ) + dup vocab>core2-path path>literals + literals>manifest ; + +: load-modern-closure ( vocab -- manifests ) + \ load-modern reset-memoized + load-modern [ holders>using [ load-modern ] map ] closure ; + + +/* +"sequences" load-modern +[ holder>definitions ] map sift +[ dup array? [ [ name>> ] map ] [ name>> ] if ] map flatten +describe + + +clear +"sequences" load-modern +definitions>> [ define'? ] filter +[ holder>> word'? ] filter +first +*/ diff --git a/core/modern/modern-tests.factor b/core/modern/modern-tests.factor new file mode 100644 index 0000000000..3f38926cde --- /dev/null +++ b/core/modern/modern-tests.factor @@ -0,0 +1,98 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors kernel modern sequences strings tools.test ; +IN: modern.tests + +{ 0 } [ "" string>literals length ] unit-test +{ 1 } [ "a" string>literals length ] unit-test +{ 1 } [ " a" string>literals length ] unit-test +{ 1 } [ " a " string>literals length ] unit-test +{ 3 } [ "a b c" string>literals length ] unit-test + +{ 1 } [ "`abc" string>literals length ] unit-test +{ 2 } [ "`abc `cba" string>literals length ] unit-test +{ 2 } [ "\"abc\" \"cba\"" string>literals length ] unit-test +{ 2 } [ "[[abc]] [[cba]]" string>literals length ] unit-test +{ 2 } [ "{{abc}} {{cba}}" string>literals length ] unit-test +{ 2 } [ "((abc)) ((cba))" string>literals length ] unit-test +{ 2 } [ "[=[abc]=] [=[cba]=]" string>literals length ] unit-test +{ 2 } [ "{={abc}=} {={cba}=}" string>literals length ] unit-test +{ 2 } [ "(=(abc)=) (=(cba)=)" string>literals length ] unit-test +{ 2 } [ "[==[abc]==] [==[cba]==]" string>literals length ] unit-test +{ 2 } [ "{=={abc}==} {=={cba}==}" string>literals length ] unit-test +{ 2 } [ "(==(abc)==) (==(cba)==)" string>literals length ] unit-test + +{ 1 } [ "hex`abc" string>literals length ] unit-test +{ 2 } [ "hex`abc hex`cba" string>literals length ] unit-test +{ 2 } [ "hex\"abc\" hex\"cba\"" string>literals length ] unit-test +{ 2 } [ "hex[[abc]] hex[[cba]]" string>literals length ] unit-test +{ 2 } [ "hex{{abc}} hex{{cba}}" string>literals length ] unit-test +{ 2 } [ "hex((abc)) hex((cba))" string>literals length ] unit-test +{ 2 } [ "hex[=[abc]=] hex[=[cba]=]" string>literals length ] unit-test +{ 2 } [ "hex{={abc}=} hex{={cba}=}" string>literals length ] unit-test +{ 2 } [ "hex(=(abc)=) hex(=(cba)=)" string>literals length ] unit-test +{ 2 } [ "hex[==[abc]==] hex[==[cba]==]" string>literals length ] unit-test +{ 2 } [ "hex{=={abc}==} hex{=={cba}==}" string>literals length ] unit-test +{ 2 } [ "hex(==(abc)==) hex(==(cba)==)" string>literals length ] unit-test + + +{ 1 } [ "[ ]" string>literals length ] unit-test +{ 1 } [ "abc[ ]" string>literals length ] unit-test +{ 1 } [ "abc[ 1 ]" string>literals length ] unit-test +{ 1 } [ "abc[ 1 abc]" string>literals length ] unit-test +{ 1 } [ "{ }" string>literals length ] unit-test +{ 1 } [ "abc{ }" string>literals length ] unit-test +{ 1 } [ "abc{ 1 }" string>literals length ] unit-test +{ 1 } [ "abc{ 1 abc}" string>literals length ] unit-test + +{ 1 } [ "( )" string>literals length ] unit-test +{ 1 } [ "abc( )" string>literals length ] unit-test +{ 1 } [ "abc( 1 )" string>literals length ] unit-test +{ 1 } [ "abc( 1 abc)" string>literals length ] unit-test + +[ "A{ B}" string>literals ] must-fail +[ "A( B)" string>literals ] must-fail +[ "A[ B]" string>literals ] must-fail +[ "A: B;" string>literals ] must-fail +[ "A: AA;" string>literals ] must-fail +[ "A: B{ C} A;" string>literals ] must-fail + +{ 1 } [ "!omg" string>literals length ] unit-test +{ 1 } [ "! omg" string>literals length ] unit-test +{ 1 } [ "![[omg]]" string>literals length ] unit-test +{ 1 } [ "![[ + omg]]" string>literals length +] unit-test + +{ 1 } [ "\\ a" string>literals length ] unit-test +{ 1 } [ "\\ \\" string>literals length ] unit-test +{ 1 } [ " \\ abcd " string>literals length ] unit-test + +{ "omg" } [ "!omg" string>literals first payload>> >string ] unit-test + +! Comment character should be #, and should not be allowed in word names +! For now, we have exclamation as comment character and words +! like suffix! which aren't allowed to start comments +{ 2 } [ "a!omg lol" string>literals length ] unit-test +{ 3 } [ "a! omg lol" string>literals length ] unit-test +{ 2 } [ "a![[omg]] lol" string>literals length ] unit-test + +{ t } [ "[ ][ ][ ]" string>literals length 1 = ] unit-test +{ t } [ "[ ][ ][ ]" string>literals first compound-literal? ] unit-test +{ t } [ "[ ][ ][ ]" string>literals first sequence>> length 3 = ] unit-test + +! This is broken. +! hex[[abc]] -> hex#[[abc]] ! commented out hex literal! +! $hex[[abc${0}]] ! interpolate +! { 2 } [ "a![[ +! omg]] lol" string>literals length +! ] unit-test + + +{ 1 } [ "a@ b@ hi @c @d" string>literals length ] unit-test + +{ 1 } [ "{ 1 }@ { 2 }@ hi @{ 3 } @{ 4 }" string>literals length ] unit-test + + +{ 1 } [ ":foo" string>literals length ] unit-test +{ 1 } [ "( :integer )" string>literals length ] unit-test \ No newline at end of file diff --git a/core/modern/modern.factor b/core/modern/modern.factor new file mode 100644 index 0000000000..674d19ed8c --- /dev/null +++ b/core/modern/modern.factor @@ -0,0 +1,469 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs assocs.extras combinators +combinators.short-circuit constructors continuations fry +io.encodings.utf8 io.files kernel locals macros make math +math.order modern.paths modern.slices multiline namespaces +quotations sequences sequences.extras splitting +splitting.monotonic strings unicode ; +IN: modern + +<< +! Base rules, everything should have a generator macro +TUPLE: lexer generator ; + +! Declarative rules, add more! +TUPLE: tag-lexer < lexer ; ! default, if nothing else matches, add one with regexp for c-style names etc +TUPLE: dquote-lexer < lexer delimiter escape ignore-whitespace? ; ! ``close`` slot someday to allow ` ' +TUPLE: matched-lexer < lexer delimiter double-char ; ! ``close`` slot someday, to allow `` '' +TUPLE: backtick-lexer < lexer delimiter ; +TUPLE: backslash-lexer < lexer delimiter payload-exception? ; ! payload-exception is \n words +TUPLE: line-comment-lexer < lexer delimiter word-name-exception? ; ! escape-newline-exception? (like C) +TUPLE: colon-lexer < lexer delimiter ; +TUPLE: semicolon-lexer < lexer delimiter ; ! ; inline foldable +TUPLE: whitespace-lexer < lexer delimiter ; ! \s \r \n \t? +TUPLE: terminator-lexer < lexer delimiter ; +TUPLE: decorator-lexer < lexer delimiter ; + +! Base lexer result +TUPLE: literal underlying seq lexer left-decorators right-decorators ; +TUPLE: tag-literal < literal tag ; +TUPLE: matched-literal < tag-literal delimiter payload closing-tag ; +TUPLE: delimited-literal < tag-literal delimiter payload ; +TUPLE: decorator-literal < literal delimiter payload ; + +TUPLE: dquote-literal < delimited-literal ; +TUPLE: single-matched-literal < matched-literal ; +TUPLE: double-matched-literal < matched-literal ; +TUPLE: uppercase-colon-literal < single-matched-literal ; +TUPLE: lowercase-colon-literal < delimited-literal ; +! TUPLE: standalone-colon-literal < delimited-literal ; ! :foo +TUPLE: backtick-literal < delimited-literal ; +TUPLE: backslash-literal < delimited-literal ; +TUPLE: semicolon-literal < delimited-literal ; +TUPLE: line-comment-literal < delimited-literal ; +TUPLE: terminator-literal < tag-literal ; +TUPLE: whitespace-literal < tag-literal ; + +TUPLE: left-decorator-literal < decorator-literal ; +TUPLE: right-decorator-literal < decorator-literal ; + +TUPLE: compound-sequence-literal sequence ; +CONSTRUCTOR: compound-sequence-literal ( sequence -- obj ) ; +>> + +GENERIC: lexed-underlying ( obj -- slice ) +M: f lexed-underlying ; +M: object lexed-underlying underlying>> ; +M: slice lexed-underlying ; + +TUPLE: compound-literal sequence ; +CONSTRUCTOR: compound-literal ( sequence -- obj ) ; + +! Ensure that we only have one decorated thing in a compound-literal +ERROR: bad-compound-literal seq decorators words ; +: check-compound-literal ( seq -- seq ) ; + +GENERIC: make-compound-literals ( seq -- seq' ) +M: object make-compound-literals ; +M: array make-compound-literals + [ + { + [ [ lexed-underlying ] bi@ slices-touch? ] + [ [ ] [ left-decorator-literal? ] bi* and ] + [ [ right-decorator-literal? ] [ ] bi* and ] + } 2|| + ] monotonic-split + [ dup length 1 > [ ] [ first ] if ] map ; + +! We have empty decorators, just the @ right here +! wrap the decorated object in the payload slot +GENERIC: collapse-decorators ( seq -- seq' ) +M: object collapse-decorators ; +M: array collapse-decorators + [ + { + [ [ left-decorator-literal? ] [ ] bi* and ] + [ [ ] [ right-decorator-literal? ] bi* and ] + } 2|| + ] monotonic-split + [ + dup length 1 > [ + first2 + 2dup [ left-decorator-literal? ] [ ] bi* and [ + >>payload + ] [ + [ payload<< ] keep + ] if + ] [ + first + ] if + ] map ; + +: split-double-dash ( seq -- seqs ) + dup [ { [ tag-literal? ] [ tag>> "--" = ] } 1&& ] split-when + dup length 1 > [ + nip + ] [ + drop + ] if ; + +: postprocess-lexed ( seq -- seq' ) + collapse-decorators make-compound-literals ; + + +ERROR: whitespace-expected-after n string ch ; +ERROR: expected-more-tokens n string expected ; +ERROR: string-expected-got-eof n string ; + +:: make-tag-literal ( tag -- literal ) + tag-literal new + tag >string >>tag + tag >>underlying + tag 1array >>seq ; inline + +:: make-tag-class-literal ( tag class -- literal ) + class new + tag >string >>tag + tag >>underlying + tag 1array >>seq ; inline + +:: make-tag-payload-literal ( payload last tag class -- literal ) + class new + tag >string >>tag + payload >string >>payload + tag last [ dup tag-literal? [ lexed-underlying ] when ] bi@ ?span-slices >>underlying + tag payload 2array >>seq ; inline + +:: make-delimited-literal ( payload last tag delimiter class -- literal ) + class new + tag >string >>tag + payload dup slice? [ >string ] when >>payload + tag last [ dup tag-literal? [ lexed-underlying ] when ] bi@ ?span-slices >>underlying + delimiter >string >>delimiter + tag delimiter payload 3array >>seq ; inline + +ERROR: mismatched-closing opening closing ; +:: make-matched-literal ( payload closing tag opening-delimiter class -- literal ) + class new + tag >string >>tag + payload postprocess-lexed opening-delimiter "\"" = [ split-double-dash ] unless >>payload + tag closing [ dup tag-literal? [ lexed-underlying ] when ] bi@ ?span-slices >>underlying + opening-delimiter >string >>delimiter + dup single-matched-literal? [ + closing tag>> length 1 > [ + tag opening-delimiter append + matching-delimiter-string closing tag>> sequence= [ opening-delimiter closing tag>> mismatched-closing ] unless + ] when + closing tag>> >>closing-tag + ] when + tag opening-delimiter payload closing 4array >>seq ; inline + +:: make-decorator-literal ( payload delimiter class -- literal ) + class new + delimiter >>delimiter + payload >>payload + payload delimiter [ lexed-underlying ] bi@ ?span-slices >>underlying + class left-decorator-literal = [ + delimiter payload 2array + ] [ + payload delimiter 2array + ] if >>seq ; inline + +:: make-decorator-sentinel ( delimiter left? -- literal ) + left? left-decorator-literal right-decorator-literal ? new + delimiter >>delimiter + delimiter 1array >>seq + delimiter >>underlying ; inline + +ERROR: long-opening-mismatch tag open n string ch ; + +! (( )) [[ ]] {{ }} +MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) ) + open-ch dup matching-delimiter { + [ drop 2 swap ] + [ drop 1string ] + [ nip 2 swap ] + } 2cleave :> ( openstr2 openstr1 closestr2 ) + [| n string tag! ch | + ch { + { CHAR: = [ + n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) + ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless + opening matching-delimiter-string :> needle + + n' string' needle slice-til-string :> ( n'' string'' payload closing ) + n'' string + payload closing tag opening double-matched-literal make-matched-literal + ] } + { open-ch [ + tag 1 cut-slice* swap tag! 1 modify-to :> opening + n 1 + string closestr2 slice-til-string :> ( n' string' payload closing ) + n' string + payload closing tag opening double-matched-literal make-matched-literal + ] } + [ [ tag openstr2 n string ] dip long-opening-mismatch ] + } case + ] ; + +: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ; +: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ; +: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ; + +DEFER: lex +DEFER: lex-factor +ERROR: lex-expected-but-got-eof n string expected ; +! For implementing [ { ( +: lex-until ( n string tags -- n' string payload closing ) + pick [ + 3dup '[ + [ + lex-factor dup , [ + dup tag-literal? [ + ! } gets a chance, but then also full seq { } after recursion... + [ _ ] dip underlying>> '[ _ sequence= ] any? not + ] [ + drop t ! loop again? + ] if + ] [ + _ _ _ lex-expected-but-got-eof + ] if* + ] loop + ] { } make unclip-last + ] [ + lex-expected-but-got-eof + ] if ; + +MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) + ch dup matching-delimiter { + [ drop "=" swap prefix ] + [ nip 1string ] + } 2cleave :> ( openstreq closestr1 ) ! [= ] + [| n string tag | + n string tag + 2over nth-check-eof { + { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( + { [ dup blank? ] [ drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip 1 cut-slice* single-matched-literal make-matched-literal ] } ! ( foo ) + [ drop [ slice-til-whitespace drop ] dip span-slices make-tag-literal ] ! (foo) + } cond + ] ; + +: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ; +: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ; +: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ; + +: read-backtick ( n string opening -- n' string obj ) + [ + slice-til-whitespace drop + dup + ] dip 1 cut-slice* backtick-literal make-delimited-literal ; + +: read-string-payload ( n string -- n' string ) + over [ + { CHAR: \ CHAR: " } slice-til-separator-inclusive { + { f [ drop ] } + { CHAR: " [ drop ] } + { CHAR: \ [ drop next-char-from drop read-string-payload ] } + } case + ] [ + string-expected-got-eof + ] if ; + +:: read-string ( n string tag -- n' string seq ) + n string read-string-payload drop :> n' + n' string + n' [ n string string-expected-got-eof ] unless + n n' 1 - string + n' 1 - n' string + tag 1 cut-slice* dquote-literal make-matched-literal ; + +: take-comment ( n string slice -- n' string comment ) + 2over ?nth CHAR: [ = [ + [ 1 + ] 2dip 2over ?nth read-double-matched-bracket + ] [ + [ slice-til-eol drop dup ] dip 1 cut-slice* line-comment-literal make-delimited-literal + ] if ; + +: read-til-semicolon ( n string slice -- n' string semi ) + dup '[ but-last ";" append ";" 2array lex-until ] dip + 1 cut-slice* uppercase-colon-literal make-matched-literal ; + +: read-word-or-til-semicolon ( n string slice -- n' string obj ) + 2over next-char-from* "\s\r\n" member? [ + read-til-semicolon + ] [ + merge-slice-til-whitespace make-tag-literal + ] if ; + +: read-lowercase-colon ( n string slice -- n' string lowercase-colon ) + [ lex-factor dup ] dip 1 cut-slice* + lowercase-colon-literal make-delimited-literal ; + +: strict-upper? ( string -- ? ) + [ { [ CHAR: A CHAR: Z between? ] [ "#:-" member? ] } 1|| ] all? ; + +ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ; +: read-colon ( n string slice -- n' string colon ) + dup length 1 = [ + dup prev-char-from-slice { CHAR: \s CHAR: \r CHAR: \n f } member? [ + dup next-char-from-slice { CHAR: \s CHAR: \r CHAR: \n f } member? [ + read-til-semicolon + ] [ + merge-slice-til-whitespace make-tag-literal + ] if + ] [ + read-lowercase-colon + ] if + ] [ + { + { [ dup strict-upper? ] [ read-til-semicolon ] } + [ read-lowercase-colon ] + } cond + ] if ; + +! Words like append! and suffix! are allowed for now. +: read-exclamation ( n string slice -- n' string obj ) + dup { [ "!" sequence= ] [ "#!" sequence= ] } 1|| + [ take-comment ] [ merge-slice-til-whitespace make-tag-literal ] if ; + +ERROR: backslash-expects-whitespace slice ; +: read-backslash ( n string slice -- n' string obj ) + 2over peek-from blank? [ + ! \ foo, M\ foo + [ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* backslash-literal make-delimited-literal + ] [ + ! M\N + merge-slice-til-whitespace make-tag-literal + ] if ; + +! If the slice is 0 width, we stopped on whitespace. +! Advance the index and read again! +: read-token-or-whitespace ( n string slice -- n' string slice ) + dup length 0 = + [ drop [ 1 + ] dip lex-factor ] + [ make-tag-literal ] if ; + +ERROR: mismatched-terminator n string slice ; +: read-terminator ( n string slice -- n' string slice ) + terminator-literal make-tag-class-literal ; + +: ?blank? ( ch/f -- blank/f ) + { [ blank? ] [ f = ] } 1|| ; + + + +: read-decorator ( n string slice -- n' string obj ) + { + { [ dup left-decorator? ] [ t make-decorator-sentinel ] } + { [ dup right-decorator? ] [ + dup length 1 > [ + [ -1 + ] 2dip + -1 modify-to make-tag-literal + ] [ + f make-decorator-sentinel + ] if ] } + [ make-tag-literal ] + } cond ; + +SYMBOL: lexing-delimiters + +: add-lexing-delimiter ( rule -- ) + [ ] [ delimiter>> ] bi lexing-delimiters get set-once-at ; + +<< +: lexer-rules>hashtable ( seq -- obj ) + H{ } clone lexing-delimiters [ + [ add-lexing-delimiter ] each + lexing-delimiters get + ] with-variable ; + +: lexer-rules>delimiters ( seq -- string ) + [ delimiter>> ] "" map-as ; + +: lexer-rules>assoc ( seq -- seq' ) + [ [ delimiter>> ] [ generator>> 1quotation ] bi ] { } map>assoc ; +>> + +MACRO: rules>call-lexer ( seq -- quot: ( n/f string -- n'/f string literal ) ) + [ lexer-rules>delimiters ] + [ + lexer-rules>assoc + { f [ f like dup [ make-tag-literal ] when ] } suffix + ] bi + '[ _ slice-til-either _ case ] ; + +CONSTANT: factor-lexing-rules { + T{ line-comment-lexer { generator read-exclamation } { delimiter CHAR: ! } } + T{ backtick-lexer { generator read-backtick } { delimiter CHAR: ` } } + T{ backslash-lexer { generator read-backslash } { delimiter CHAR: \ } } + T{ dquote-lexer { generator read-string } { delimiter CHAR: " } { escape CHAR: \ } } + T{ decorator-lexer { generator read-decorator } { delimiter CHAR: @ } } + + T{ colon-lexer { generator read-colon } { delimiter CHAR: : } } + T{ matched-lexer { generator read-bracket } { delimiter CHAR: [ } } + T{ matched-lexer { generator read-brace } { delimiter CHAR: { } } + T{ matched-lexer { generator read-paren } { delimiter CHAR: ( } } + + T{ terminator-lexer { generator read-terminator } { delimiter CHAR: ; } } + T{ terminator-lexer { generator read-terminator } { delimiter CHAR: ] } } + T{ terminator-lexer { generator read-terminator } { delimiter CHAR: } } } + T{ terminator-lexer { generator read-terminator } { delimiter CHAR: ) } } + + T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter CHAR: \s } } + T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter CHAR: \r } } + T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter CHAR: \n } } +} + +: lex-factor ( n/f string -- n'/f string literal ) + factor-lexing-rules rules>call-lexer ; + +: string>literals ( string -- sequence ) + [ 0 ] dip [ lex-factor ] loop>array 2nip postprocess-lexed ; + +: vocab>literals ( vocab -- sequence ) + ".private" ?tail drop + modern-source-path utf8 file-contents string>literals ; + +: path>literals ( path -- sequence ) + utf8 file-contents string>literals ; + +: lex-core ( -- assoc ) + core-bootstrap-vocabs [ [ vocab>literals ] [ nip ] recover ] map-zip ; + +: filter-lex-errors ( assoc -- assoc' ) + [ nip array? not ] assoc-filter ; + + +/* +! What a lexer body looks like, produced by make-lexer +: lex ( n/f string -- n'/f string literal ) + "!`\\\"[{(\s\r\n" slice-til-either { + { CHAR: ! [ read-exclamation ] } + { CHAR: ` [ read-backtick ] } + { CHAR: \ [ read-backslash ] } + { CHAR: " [ read-string ] } + { CHAR: [ [ read-bracket ] } + { CHAR: { [ read-brace ] } + { CHAR: ( [ read-paren ] } + { CHAR: \s [ read-token-or-whitespace ] } + { CHAR: \r [ read-token-or-whitespace ] } + { CHAR: \n [ read-token-or-whitespace ] } + { f [ f like dup [ make-tag-literal ] when ] } + } case ; inline +*/ diff --git a/core/modern/out/authors.txt b/core/modern/out/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/out/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/out/out-tests.factor b/core/modern/out/out-tests.factor new file mode 100644 index 0000000000..7a5292373a --- /dev/null +++ b/core/modern/out/out-tests.factor @@ -0,0 +1,56 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators.short-circuit kernel modern +modern.out sequences tools.test multiline ; +IN: modern.out.tests + +: rewrite-same-string ( string -- ? ) + [ [ ] rewrite-string ] keep sequence= ; + +: rename-backtick-delimiter ( string -- string' ) + [ + dup backtick-literal? [ [ drop "^" ] change-delimiter ] when + ] rewrite-string ; + +: rename-backslash-delimiter ( string -- string' ) + [ + dup backslash-literal? [ [ drop "^" ] change-delimiter ] when + ] rewrite-string ; + +{ t } [ "fixnum`33 ch`@" rewrite-same-string ] unit-test +{ t } [ "! omg" rewrite-same-string ] unit-test +{ t } [ "todo! omg" rewrite-same-string ] unit-test +{ t } [ "foo[ bar{ baz( ) } ]" rewrite-same-string ] unit-test + +{ t } [ " ARRAY: ;" rewrite-same-string ] unit-test +{ t } [ " ARRAY: 1 ;{ inline }" rewrite-same-string ] unit-test +{ t } [ " ARRAY: 1 ;[ 0 ]" rewrite-same-string ] unit-test + +{ t } [ " abc{ 1 2 3 abc}" rewrite-same-string ] unit-test +{ t } [ " ABC: abc{ 1 2 3 abc} ABC;" rewrite-same-string ] unit-test +{ t } [ " a{ a{ a{ a} } a}" rewrite-same-string ] unit-test + +! Funky spaced decorator test +{ t } [ + " lol@ { 1 }@ { 2 }@ hi @{ 3 } @{ 4 } @inline" rewrite-same-string +] unit-test +! Disable these for now. +! { t } [ " array: 1" rewrite-same-string ] unit-test +! { t } [ " { array: 1 array: 2 }" rewrite-same-string ] unit-test + + + +{ "fixnum^33 ch^@" } [ "fixnum`33 ch`@" rename-backtick-delimiter ] unit-test + +{ "^ foo ^ bar" } [ "\\ foo \\ bar" rename-backslash-delimiter ] unit-test + +/* +{ ": asdf < '< > > ;" } [ + ": asdf [ '[ ] ] ;" [ + dup { [ single-matched-literal? ] [ delimiter>> "[" = ] } 1&& + [ [ drop "<" ] change-delimiter ] when + ] rewrite-string +] unit-test +*/ + +! lexable-paths [ transform-single-line-comment>hash-comment ] rewrite-paths diff --git a/core/modern/out/out.factor b/core/modern/out/out.factor new file mode 100644 index 0000000000..fbbafc0c13 --- /dev/null +++ b/core/modern/out/out.factor @@ -0,0 +1,311 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators combinators.short-circuit +combinators.smart continuations fry io io.encodings.utf8 +io.files io.streams.string kernel modern modern.paths +modern.slices multiline namespaces prettyprint sequences sets +splitting strings arrays ; +IN: modern.out + +SYMBOL: last-slice + +: write-whitespace ( obj -- ) + [ last-slice get [ swap slice-between ] [ slice-before ] if* io:write ] + [ last-slice namespaces:set ] bi ; + +DEFER: write-literal +GENERIC: write-literal ( obj -- ) +! M: object write-literal lexed-underlying write ; +M: string write-literal write ; +M: slice write-literal [ write-whitespace ] [ write ] bi ; + +M: array write-literal [ write-literal ] each ; + +M: tag-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> write ] + } cleave ; + +M: single-matched-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> write ] + [ payload>> [ write-literal ] each ] ! don't need write-whitespace here, the recursion does it + [ seq>> 3 swap nth lexed-underlying write-whitespace ] + [ closing-tag>> write ] + } cleave ; + +M: double-matched-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ seq>> 2 swap nth write-whitespace ] + [ payload>> io:write ] + [ seq>> 3 swap nth write-whitespace ] + [ delimiter>> matching-delimiter-string io:write ] + } cleave ; + +M: dquote-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ seq>> 2 swap nth write-whitespace ] + [ payload>> io:write ] + [ seq>> 3 swap nth write-whitespace ] + [ delimiter>> matching-delimiter-string io:write ] + } cleave ; + +M: backtick-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ seq>> 2 swap nth write-whitespace ] + [ payload>> io:write ] + } cleave ; + +M: backslash-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ seq>> 2 swap nth write-whitespace ] + [ payload>> io:write ] + } cleave ; + +M: line-comment-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ seq>> 2 swap nth write-whitespace ] + [ payload>> io:write ] + } cleave ; + +M: uppercase-colon-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> write ] + [ payload>> [ write-literal ] each ] ! don't need write-whitespace here, the recursion does it + [ seq>> 3 swap nth lexed-underlying write-whitespace ] + [ closing-tag>> write ] + } cleave ; + +M: lowercase-colon-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ tag>> io:write ] + [ seq>> 1 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ payload>> write-literal ] ! don't need write-whitespace here, the recursion does it + } cleave ; + +M: left-decorator-literal write-literal + { + [ seq>> 0 swap nth write-whitespace ] + [ delimiter>> io:write ] + [ payload>> write-literal ] ! don't need write-whitespace here, the recursion does it + } cleave ; + +M: right-decorator-literal write-literal + { + [ payload>> write-literal ] ! don't need write-whitespace here, the recursion does it + [ seq>> 0 swap nth write-whitespace ] + [ delimiter>> io:write ] + } cleave ; + +M: compound-literal write-literal + sequence>> [ write-literal ] each ; + +M: compound-sequence-literal write-literal + sequence>> [ write-literal ] each ; + +! Swap in write-literal for renaming + +: write-modern-loop ( quot -- ) + [ write-literal ] each ; inline + +: write-modern-string ( seq -- string ) + [ write-modern-loop ] with-string-writer ; inline + +: write-modern-path ( seq path -- ) + utf8 [ write-modern-loop nl ] with-file-writer ; inline + +: map-literals ( obj quot: ( obj -- obj' ) -- seq ) + over single-matched-literal? [ + [ call drop ] [ + '[ + dup compound-sequence-literal? [ sequence>> ] when + [ _ map-literals ] map + ] change-payload + ] 2bi + ] [ + call + ] if ; inline recursive + +: rewrite-path ( path quot -- ) + ! dup print + '[ [ path>literals [ _ map-literals ] map ] [ ] bi write-modern-path ] + [ drop . ] recover ; inline + +: rewrite-string ( string quot -- ) + ! dup print + [ string>literals ] dip '[ _ map-literals ] map write-modern-string ; inline + +: rewrite-paths ( seq quot -- ) '[ _ rewrite-path ] each ; inline +: lexable-core-paths ( -- seq ) core-source-paths ; +: lexable-basis-paths ( -- seq ) + basis-source-paths { + "resource:basis/bit-arrays/bit-arrays.factor" + "resource:basis/bit-vectors/bit-vectors.factor" + "resource:basis/csv/csv.factor" + "resource:basis/dlists/dlists.factor" + "resource:basis/eval/eval.factor" + "resource:basis/farkup/farkup.factor" + "resource:basis/fry/fry.factor" + "resource:basis/linked-assocs/linked-assocs.factor" + "resource:basis/literals/literals.factor" + "resource:basis/nibble-arrays/nibble-arrays.factor" + "resource:basis/shuffle/shuffle.factor" + "resource:basis/simple-tokenizer/simple-tokenizer.factor" + "resource:basis/specialized-arrays/specialized-arrays.factor" + "resource:basis/specialized-vectors/specialized-vectors.factor" + "resource:basis/suffix-arrays/suffix-arrays.factor" + "resource:basis/urls/urls.factor" + "resource:basis/vlists/vlists.factor" + "resource:basis/alien/data/data.factor" + "resource:basis/alien/syntax/syntax.factor" + "resource:basis/byte-arrays/hex/hex.factor" + "resource:basis/classes/struct/struct.factor" + "resource:basis/cocoa/messages/messages.factor" + "resource:basis/db/postgresql/errors/errors.factor" + "resource:basis/hash-sets/identity/identity.factor" + "resource:basis/hash-sets/sequences/sequences.factor" + "resource:basis/hashtables/identity/identity.factor" + "resource:basis/hashtables/sequences/sequences.factor" + "resource:basis/help/handbook/handbook.factor" + "resource:basis/help/html/html.factor" + "resource:basis/html/templates/fhtml/fhtml.factor" + "resource:basis/http/parsers/parsers.factor" + "resource:basis/io/encodings/iso2022/iso2022.factor" + "resource:basis/json/reader/reader.factor" + "resource:basis/json/writer/writer.factor" + "resource:basis/math/complex/complex.factor" + "resource:basis/math/vectors/simd/simd.factor" + "resource:basis/math/vectors/simd/cords/cords.factor" + "resource:basis/memoize/syntax/syntax.factor" + "resource:basis/peg/ebnf/ebnf.factor" + "resource:basis/peg/parsers/parsers.factor" + "resource:basis/persistent/hashtables/hashtables.factor" + "resource:basis/persistent/vectors/vectors.factor" + "resource:basis/regexp/parser/parser.factor" + "resource:basis/xml/autoencoding/autoencoding.factor" + "resource:basis/xml/dtd/dtd.factor" + "resource:basis/xml/elements/elements.factor" + "resource:basis/xml/entities/entities.factor" + } diff ; + +: lexable-extra-paths ( -- seq ) + extra-source-paths { + "resource:extra/brainfuck/brainfuck.factor" ! EBNF: [[ ]] ; + "resource:extra/cuesheet/cuesheet.factor" ! CHAR: " + "resource:extra/fjsc/fjsc.factor" ! EBNF: + "resource:extra/emojify/emojify.factor" ! R/ + "resource:extra/gml/gml.factor" + "resource:extra/metar/metar.factor" ! R/ + "resource:extra/morse/morse.factor" + "resource:extra/rosetta-code/balanced-brackets/balanced-brackets.factor" + "resource:extra/flip-text/flip-text.factor" + "resource:extra/ini-file/ini-file.factor" + "resource:extra/poker/poker.factor" + "resource:extra/qw/qw.factor" + "resource:extra/svg/svg.factor" + "resource:extra/text-to-pdf/text-to-pdf.factor" + "resource:extra/tnetstrings/tnetstrings.factor" + "resource:extra/trees/trees.factor" + "resource:extra/alien/data/map/map.factor" + "resource:extra/arrays/shaped/shaped.factor" + "resource:extra/bunny/outlined/outlined.factor" + "resource:extra/c/lexer/lexer.factor" + "resource:extra/c/preprocessor/preprocessor.factor" + "resource:extra/gml/parser/parser.factor" + "resource:extra/gml/runtime/runtime.factor" + "resource:extra/gpu/effects/blur/blur.factor" + "resource:extra/hash-sets/numbers/numbers.factor" + "resource:extra/hashtables/numbers/numbers.factor" + "resource:extra/html/parser/parser.factor" + "resource:extra/infix/parser/parser.factor" + "resource:extra/infix/tokenizer/tokenizer.factor" + "resource:extra/parser-combinators/simple/simple.factor" + "resource:extra/pdf/values/values.factor" + "resource:extra/peg/pl0/pl0.factor" + "resource:extra/peg/javascript/parser/parser.factor" + "resource:extra/peg/javascript/tokenizer/tokenizer.factor" + "resource:extra/project-euler/011/011.factor" + "resource:extra/rosetta-code/balanced-brackets/balanced-bracke..." + "resource:extra/slots/syntax/syntax.factor" + "resource:extra/smalltalk/parser/parser.factor" + "resource:extra/talks/galois-talk/galois-talk.factor" + "resource:extra/talks/google-tech-talk/google-tech-talk.factor" + "resource:extra/talks/otug-talk/otug-talk.factor" + "resource:extra/talks/vpri-talk/vpri-talk.factor" + "resource:extra/trees/avl/avl.factor" + "resource:extra/trees/splay/splay.factor" + "resource:extra/yaml/conversion/conversion.factor" + } diff ; + +/* +! These work except they use pegs/ebnf, grep for [[ ]] + modified: basis/db/sqlite/errors/errors.factor + modified: basis/formatting/formatting.factor + modified: basis/globs/globs.factor + modified: extra/alien/fortran/fortran.factor + modified: extra/cpu/8080/emulator/emulator.factor + modified: extra/peg/expr/expr.factor + modified: extra/rosetta-code/arithmetic-evaluation/arithmetic-evaluation.factor + modified: extra/shell/parser/parser.factor +*/ + +: lexable-paths ( -- seq ) + [ + lexable-core-paths + lexable-basis-paths + lexable-extra-paths + ] append-outputs ; + +: paren-word>tick-word ( string -- string' ) + dup [ "(" ?head drop ")" ?tail drop "'" append ] [ ] if ; + +: paren-word-name? ( string -- ? ) + { [ "(" head? ] [ ")" tail? ] } 1&& ; + +: transform-paren-word>tick-word ( token -- token' ) + dup { [ tag-literal? ] [ tag>> paren-word-name? ] } 1&& [ + [ paren-word>tick-word ] change-tag + ] when ; + +: single-line-comment? ( token -- ? ) + { [ line-comment-literal? ] [ delimiter>> "!" sequence= ] } 1&& ; + +: transform-single-line-comment>hash-comment ( token -- token' ) + dup single-line-comment? [ + [ drop "#" ] change-delimiter + ] when ; + +: transform-source ( quot -- ) + lexable-paths swap rewrite-paths ; inline + +: transform-core ( quot -- ) + lexable-core-paths swap rewrite-paths ; inline \ No newline at end of file diff --git a/core/modern/paths/authors.txt b/core/modern/paths/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/paths/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/paths/paths.factor b/core/modern/paths/paths.factor new file mode 100644 index 0000000000..5a4817d37e --- /dev/null +++ b/core/modern/paths/paths.factor @@ -0,0 +1,134 @@ +! Copyright (C) 2015 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators.smart io.files kernel sequences +splitting vocabs.files vocabs.hierarchy vocabs.loader +vocabs.metadata sets ; +IN: modern.paths + +: modern-if-available ( path -- path' ) + dup ".factor" ?tail [ + ".modern" append + dup exists? [ nip ] [ drop ] if + ] [ + drop + ] if ; + +ERROR: not-a-source-path path ; +: force-modern-path ( path -- path' ) + ".factor" ?tail [ ".modern" append ] [ not-a-source-path ] if ; +: modern-docs-path ( path -- path' ) + vocab-docs-path modern-if-available ; +: modern-tests-path ( path -- path' ) + vocab-tests-path modern-if-available ; +: modern-source-path ( path -- path' ) + vocab-source-path modern-if-available ; +: modern-syntax-path ( path -- path' ) + vocab-source-path ".factor" ?tail drop "-syntax.modern" append ; + +: force-modern-docs-path ( path -- path' ) + vocab-docs-path force-modern-path ; +: force-modern-tests-path ( path -- path' ) + vocab-tests-path force-modern-path ; +: force-modern-source-path ( path -- path' ) + vocab-source-path force-modern-path ; + +: vocabs-from ( root -- vocabs ) + "" disk-vocabs-in-root/prefix + no-prefixes [ name>> ] map ; + +: core-vocabs ( -- seq ) "resource:core" vocabs-from ; +: less-core-test-vocabs ( seq -- seq' ) + { + "vocabs.loader.test.a" + "vocabs.loader.test.b" + "vocabs.loader.test.c" + "vocabs.loader.test.d" + "vocabs.loader.test.e" + "vocabs.loader.test.f" + "vocabs.loader.test.g" + "vocabs.loader.test.h" + "vocabs.loader.test.i" + "vocabs.loader.test.j" + "vocabs.loader.test.k" + "vocabs.loader.test.l" + "vocabs.loader.test.m" + "vocabs.loader.test.n" + "vocabs.loader.test.o" + "vocabs.loader.test.p" + } diff ; + +: core-bootstrap-vocabs ( -- seq ) + core-vocabs less-core-test-vocabs ; + +: basis-vocabs ( -- seq ) "resource:basis" vocabs-from ; +: extra-vocabs ( -- seq ) "resource:extra" vocabs-from ; +: all-vocabs ( -- seq ) + [ + core-vocabs + basis-vocabs + extra-vocabs + ] { } append-outputs-as ; + +: filter-exists ( seq -- seq' ) [ exists? ] filter ; + +! These paths have syntax errors on purpose... +: reject-some-paths ( seq -- seq' ) + { + "resource:core/vocabs/loader/test/a/a.factor" + "resource:core/vocabs/loader/test/b/b.factor" + "resource:core/vocabs/loader/test/c/c.factor" + ! Here down have parse errors + "resource:core/vocabs/loader/test/d/d.factor" + "resource:core/vocabs/loader/test/e/e.factor" + "resource:core/vocabs/loader/test/f/f.factor" + "resource:core/vocabs/loader/test/g/g.factor" + "resource:core/vocabs/loader/test/h/h.factor" + "resource:core/vocabs/loader/test/i/i.factor" + "resource:core/vocabs/loader/test/j/j.factor" + "resource:core/vocabs/loader/test/k/k.factor" + "resource:core/vocabs/loader/test/l/l.factor" + "resource:core/vocabs/loader/test/m/m.factor" + "resource:core/vocabs/loader/test/n/n.factor" + "resource:core/vocabs/loader/test/o/o.factor" + "resource:core/vocabs/loader/test/p/p.factor" + "resource:extra/math/blas/vectors/vectors.factor" ! need .modern file + "resource:extra/math/blas/matrices/matrices.factor" ! need .modern file + } diff + ! Don't parse .modern files yet + [ ".modern" tail? ] reject ; + +: modern-source-paths ( names -- paths ) + [ modern-source-path ] map filter-exists reject-some-paths ; +: modern-docs-paths ( names -- paths ) + [ modern-docs-path ] map filter-exists reject-some-paths ; +: modern-tests-paths ( names -- paths ) + [ vocab-tests ] map concat + [ modern-if-available ] map filter-exists reject-some-paths ; + +: all-source-paths ( -- seq ) + all-vocabs modern-source-paths ; + +: all-docs-paths ( -- seq ) + all-vocabs modern-docs-paths ; + +: all-tests-paths ( -- seq ) + all-vocabs modern-tests-paths ; + +: all-syntax-paths ( -- seq ) + all-vocabs [ modern-syntax-path ] map filter-exists reject-some-paths ; + +: all-factor-paths ( -- seq ) + [ + all-syntax-paths all-source-paths all-docs-paths all-tests-paths + ] { } append-outputs-as ; + +: vocab-names>syntax ( strings -- seq ) + [ modern-syntax-path ] map [ exists? ] filter ; + +: core-syntax-paths ( -- seq ) core-vocabs vocab-names>syntax reject-some-paths ; +: basis-syntax-paths ( -- seq ) basis-vocabs vocab-names>syntax reject-some-paths ; +: extra-syntax-paths ( -- seq ) extra-vocabs vocab-names>syntax reject-some-paths ; + +: core-source-paths ( -- seq ) core-vocabs modern-source-paths reject-some-paths ; +: basis-source-paths ( -- seq ) basis-vocabs modern-source-paths reject-some-paths ; +: extra-source-paths ( -- seq ) extra-vocabs modern-source-paths reject-some-paths ; diff --git a/core/modern/slices/authors.txt b/core/modern/slices/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/slices/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/slices/slices.factor b/core/modern/slices/slices.factor new file mode 100644 index 0000000000..b4937f75f3 --- /dev/null +++ b/core/modern/slices/slices.factor @@ -0,0 +1,199 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors assocs fry kernel locals math math.private +sequences sequences.extras sequences.private unicode ; +IN: modern.slices + +: matching-delimiter ( ch -- ch' ) + H{ + { CHAR: ( CHAR: ) } + { CHAR: [ CHAR: ] } + { CHAR: { CHAR: } } + { CHAR: < CHAR: > } + { CHAR: : CHAR: ; } + } ?at drop ; + +: matching-delimiter-string ( string -- string' ) + [ matching-delimiter ] map ; + +ERROR: unexpected-end n string ; +: nth-check-eof ( n string -- nth ) + 2dup ?nth [ 2nip ] [ unexpected-end ] if* ; + +: peek-from ( n/f string -- ch ) + over [ ?nth ] [ 2drop f ] if ; + +! Allow eof +: next-char-from ( n/f string -- n'/f string ch/f ) + over [ + 2dup ?nth [ [ 1 + ] 2dip ] [ f ] if* + ] [ + [ 2drop f ] [ nip ] 2bi f + ] if ; + +: prev-char-from-slice-end ( slice -- ch/f ) + [ to>> 2 - ] [ seq>> ] bi ?nth ; + +: prev-char-from-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: next-char-from-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: char-before-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: char-after-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: next-char-from* ( n/f string -- ch/f ) + next-char-from 2nip ; + +: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? ) + [ find-from ] keep + pick [ drop t ] [ length -rot nip f ] if ; inline + +: skip-blank-from ( n string -- n' string ) + [ [ blank? not ] find-from* 2drop ] keep ; inline + +: skip-til-eol-from ( n string -- n' string ) + [ [ "\r\n" member? ] find-from* 2drop ] keep ; inline + +! Don't include the whitespace in the slice +:: slice-til-whitespace ( n string -- n' string slice/f ch/f ) + n string [ "\s\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +:: slice-until' ( n string quot -- n' string slice/f ch/f ) + n string quot find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-until ( n string quot -- n' string slice/f ) + slice-until' drop ; inline + +:: slice-til-not-whitespace ( n string -- n' string slice/f ch/f ) + n string [ "\s\r\n" member? not ] find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: skip-whitespace ( n string -- n' string ) + slice-til-not-whitespace 2drop ; + +: empty-slice-end ( seq -- slice ) + [ length dup ] [ ] bi ; inline + +: empty-slice-from ( n seq -- slice ) + dupd ; inline + +:: slice-til-eol ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +:: merge-slice-til-eol-slash'' ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n\\" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +: merge-slice-til-whitespace ( n string slice -- n' string slice' ) + [ slice-til-whitespace drop ] dip merge-slices ; + +: merge-slice-til-eol ( n string slice -- n' string slice' ) + [ slice-til-eol drop ] dip merge-slices ; + +: slice-between ( slice1 slice2 -- slice ) + ! ensure-same-underlying + slice-order-by-from + [ to>> ] + [ [ from>> 2dup < [ swap ] unless ] [ seq>> ] bi ] bi* ; + +: slice-before ( slice -- slice' ) + [ drop 0 ] [ from>> ] [ seq>> ] tri ; + +: ?nth' ( n/f string/f -- obj/f ) + over [ ?nth ] [ 2drop f ] if ; + +:: merge-slice-til-eol-slash' ( n string slice -- n' string slice/f ch/f ) + n string merge-slice-til-eol-slash'' :> ( n' string' slice' ch' ) + ch' CHAR: \ = [ + n' 1 + string' ?nth' "\r\n" member? [ + n' 2 + string' slice slice' span-slices merge-slice-til-eol-slash' + ] [ + "omg" throw + ] if + ] [ + n' string' slice slice' span-slices ch' + ] if ; + +! Supports \ at eol (with no space after it) +: slice-til-eol-slash ( n string -- n' string slice/f ch/f ) + 2dup empty-slice-from merge-slice-til-eol-slash' ; + +:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f ) + n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f ) + slice-til-separator-inclusive dup [ + [ [ 1 - ] change-to ] dip + ] when ; + +:: slice-til-either ( n string tokens -- n'/f string slice/f ch ) + n [ + n string '[ tokens member? ] find-from + dup "\s\r\n" member? [ + :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch + ] if + ] [ + f string f f + ] if ; inline + +ERROR: subseq-expected-but-got-eof n string expected ; + +:: slice-til-string ( n string search -- n' string payload end-string ) + search string n start* :> n' + n' [ n string search subseq-expected-but-got-eof ] unless + n' search length + string + n n' string ? + n' dup search length + string ? ; + +: modify-from ( slice n -- slice' ) + '[ from>> _ + ] [ to>> ] [ seq>> ] tri ; + +: modify-to ( slice n -- slice' ) + [ [ from>> ] [ to>> ] [ seq>> ] tri ] dip + swap [ + ] dip ; + +! { CHAR: ] [ read-closing ] } +! { CHAR: } [ read-closing ] } +! { CHAR: ) [ read-closing ] } +: read-closing ( n string tok -- n string tok ) + dup length 1 = [ + -1 modify-to [ 1 - ] 2dip + ] unless ; +