diff --git a/extra/modern/modern-tests.factor b/extra/modern/modern-tests.factor new file mode 100644 index 0000000000..ff71231544 --- /dev/null +++ b/extra/modern/modern-tests.factor @@ -0,0 +1,243 @@ +! Copyright (C) 2017 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: modern modern.slices multiline tools.test ; +IN: modern.tests + +{ f } [ "" upper-colon? ] unit-test +{ t } [ ":" upper-colon? ] unit-test +{ t } [ "::" upper-colon? ] unit-test +{ t } [ ":::" upper-colon? ] unit-test +{ t } [ "FOO:" upper-colon? ] unit-test +{ t } [ "FOO::" upper-colon? ] unit-test +{ t } [ "FOO:::" upper-colon? ] unit-test + +! 'FOO: +{ f } [ "'" upper-colon? ] unit-test +{ t } [ "':" upper-colon? ] unit-test +{ t } [ "'::" upper-colon? ] unit-test +{ t } [ "':::" upper-colon? ] unit-test +{ t } [ "'FOO:" upper-colon? ] unit-test +{ t } [ "'FOO::" upper-colon? ] unit-test +{ t } [ "'FOO:::" upper-colon? ] unit-test + +! \FOO: is not an upper-colon form, it is deactivated by the \ +{ f } [ "\\" upper-colon? ] unit-test +{ f } [ "\\:" upper-colon? ] unit-test +{ f } [ "\\::" upper-colon? ] unit-test +{ f } [ "\\:::" upper-colon? ] unit-test +{ f } [ "\\FOO:" upper-colon? ] unit-test +{ f } [ "\\FOO::" upper-colon? ] unit-test +{ f } [ "\\FOO:::" upper-colon? ] unit-test + + +! Comment +{ + { { "!" "" } } +} [ "!" string>literals >strings ] unit-test + +{ + { { "!" " lol" } } +} [ "! lol" string>literals >strings ] unit-test + +{ + { "lol!" } +} [ "lol!" string>literals >strings ] unit-test + +{ + { { "!" "lol" } } +} [ "!lol" string>literals >strings ] unit-test + +! Colon +{ + { ":asdf:" } +} [ ":asdf:" string>literals >strings ] unit-test + +{ + { { "one:" { "1" } } } +} [ "one: 1" string>literals >strings ] unit-test + +{ + { { "two::" { "1" "2" } } } +} [ "two:: 1 2" string>literals >strings ] unit-test + +{ + { "1" ":>" "one" } +} [ "1 :> one" string>literals >strings ] unit-test + +{ + { { ":" { "foo" } ";" } } +} [ ": foo ;" string>literals >strings ] unit-test + +{ + { + { "FOO:" { "a" } } + { "BAR:" { "b" } } + } +} [ "FOO: a BAR: b" string>literals >strings ] unit-test + +{ + { { "FOO:" { "a" } ";" } } +} [ "FOO: a ;" string>literals >strings ] unit-test + +{ + { { "FOO:" { "a" } "FOO;" } } +} [ "FOO: a FOO;" string>literals >strings ] unit-test + + +! Acute +{ + { { "" } } +} [ "" string>literals >strings ] unit-test + +{ + { { "" } } +} [ "" string>literals >strings ] unit-test + +{ { "" } } [ "" string>literals >strings ] unit-test +{ { ">foo<" } } [ ">foo<" string>literals >strings ] unit-test + +{ { "foo>" } } [ "foo>" string>literals >strings ] unit-test +{ { ">foo" } } [ ">foo" string>literals >strings ] unit-test +{ { ">foo>" } } [ ">foo>" string>literals >strings ] unit-test +{ { ">>foo>" } } [ ">>foo>" string>literals >strings ] unit-test +{ { ">>foo>>" } } [ ">>foo>>" string>literals >strings ] unit-test + +{ { "foo<" } } [ "foo<" string>literals >strings ] unit-test +{ { "literals >strings ] unit-test +{ { "literals >strings ] unit-test +{ { "<literals >strings ] unit-test +{ { "<literals >strings ] unit-test + +! Backslash \AVL{ foo\bar foo\bar{ +{ + { { "SYNTAX:" { "\\AVL{" } } } +} [ "SYNTAX: \\AVL{" string>literals >strings ] unit-test + +[ "\\" string>literals >strings ] must-fail ! \ alone should be legal eventually (?) + +{ { "\\FOO" } } [ "\\FOO" string>literals >strings ] unit-test + +{ + { "foo\\bar" } +} [ "foo\\bar" string>literals >strings ] unit-test + +[ "foo\\bar{" string>literals >strings ] must-fail + +{ + { { "foo\\bar{" { "1" } "}" } } +} [ "foo\\bar{ 1 }" string>literals >strings ] unit-test + +{ { { "char:" { "\\{" } } } } [ "char: \\{" string>literals >strings ] unit-test +[ "char: {" string>literals >strings ] must-fail +[ "char: [" string>literals >strings ] must-fail +[ "char: {" string>literals >strings ] must-fail +[ "char: \"" string>literals >strings ] must-fail +! { { { "char:" { "\\\\" } } } } [ "char: \\\\" string>literals >strings ] unit-test + +[ "char: \\" string>literals >strings ] must-fail ! char: \ should be legal eventually + +{ { { "\\" { "(" } } } } [ "\\ (" string>literals >strings ] unit-test + +{ { "\\[[" } } [ "\\[[" string>literals >strings ] unit-test +{ { "\\[=[" } } [ "\\[=[" string>literals >strings ] unit-test +{ { "\\[==[" } } [ "\\[==[" string>literals >strings ] unit-test + + +{ t } [ "FOO:" strict-upper? ] unit-test +{ t } [ ":" strict-upper? ] unit-test +{ f } [ "" strict-upper? ] unit-test +{ f } [ "FOO>" strict-upper? ] unit-test +{ f } [ ";FOO>" strict-upper? ] unit-test + +{ f } [ "FOO" section-open? ] unit-test +{ f } [ "FOO:" section-open? ] unit-test +{ f } [ ";FOO" section-close? ] unit-test +{ f } [ "FOO" section-close? ] unit-test + + +! Strings +{ + { { "url\"" "google.com" "\"" } } +} [ [[ url"google.com" ]] string>literals >strings ] unit-test + +{ + { { "\"" "google.com" "\"" } } +} [ [[ "google.com" ]] string>literals >strings ] unit-test + +{ + { + { "(" { "a" "b" } ")" } + { "[" { "a" "b" "+" } "]" } + { "(" { "c" } ")" } + } +} [ "( a b ) [ a b + ] ( c )" string>literals >strings ] unit-test + +![[ +! Concatenated syntax +{ + { + { + { "(" { "a" "b" } ")" } + { "[" { "a" "b" "+" } "]" } + { "(" { "c" } ")" } + } + } +} [ "( a b )[ a b + ]( c )" string>literals >strings ] unit-test + +{ + { + { + { "\"" "abc" "\"" } + { "[" { "0" } "]" } + } + } +} [ "\"abc\"[ 0 ]" string>literals >strings ] unit-test +]] + + +{ + { + { "" } + } +} [ "" string>literals >strings ] unit-test + +{ + { + { "" } + } +} [ "" string>literals >strings ] unit-test + + +![[ +{ + { + { + { + "foo::" + { + { + { "" } + { "[" { "0" } "]" } + { "[" { "1" } "]" } + { "[" { "2" } "]" } + { "[" { "3" } "]" } + } + { { "" } } + } + } + } + } +} [ "foo:: [ 0 ][ 1 ][ 2 ][ 3 ] " string>literals >strings ] unit-test +]] + +{ + { + { "foo::" { { "" } { "[" { "0" } "]" } } } + { "[" { "1" } "]" } + { "[" { "2" } "]" } + { "[" { "3" } "]" } + { "" } + } +} [ "foo:: [ 0 ] [ 1 ] [ 2 ] [ 3 ] " string>literals >strings ] unit-test diff --git a/extra/modern/modern.factor b/extra/modern/modern.factor new file mode 100644 index 0000000000..63f2645eda --- /dev/null +++ b/extra/modern/modern.factor @@ -0,0 +1,499 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: arrays assocs combinators combinators.short-circuit +continuations fry io.encodings.utf8 io.files kernel locals make +math math.order modern.paths modern.slices sequences +sequences.extras sets splitting strings unicode vocabs.loader ; +IN: modern + +ERROR: string-expected-got-eof n string ; +ERROR: long-opening-mismatch tag open n string ch ; + +! (( )) [[ ]] {{ }} +MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) ) + open-ch dup matching-delimiter { + [ drop 2 swap ] + [ drop 1string ] + [ nip 2 swap ] + } 2cleave :> ( openstr2 openstr1 closestr2 ) + [| n string tag! ch | + ch { + { CHAR: = [ + tag 1 cut-slice* drop tag! ! tag of (=( is ( here, fix it + n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) + ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless + opening matching-delimiter-string :> needle + + n' string' needle slice-til-string :> ( n'' string'' payload closing ) + n'' string + tag opening payload closing 4array + ] } + { open-ch [ + tag 1 cut-slice* swap tag! 1 modify-to :> opening + n 1 + string closestr2 slice-til-string :> ( n' string' payload closing ) + n' string + tag opening payload closing 4array + ] } + [ [ tag openstr2 n string ] dip long-opening-mismatch ] + } case + ] ; + +: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ; +: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ; +: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ; + +DEFER: lex-factor-top +DEFER: lex-factor +ERROR: lex-expected-but-got-eof n string expected ; +! For implementing [ { ( +: lex-until ( n string tag-sequence -- n' string payload ) + 3dup '[ + [ + lex-factor-top dup f like [ , ] when* [ + dup [ + ! } gets a chance, but then also full seq { } after recursion... + [ _ ] dip '[ _ sequence= ] any? not + ] [ + drop t ! loop again? + ] if + ] [ + _ _ _ lex-expected-but-got-eof + ] if* + ] loop + ] { } make ; + +DEFER: section-close? +DEFER: upper-colon? +DEFER: lex-factor-nested +: lex-colon-until ( n string tag-sequence -- n' string payload ) + '[ + [ + lex-factor-nested dup f like [ , ] when* [ + dup [ + ! This is for ending COLON: forms like ``A: PRIVATE>`` + dup section-close? [ + drop f + ] [ + ! } gets a chance, but then also full seq { } after recursion... + [ _ ] dip '[ _ sequence= ] any? not + ] if + ] [ + drop t ! loop again? + ] if + ] [ + f + ] if* + ] loop + ] { } make ; + +: split-double-dash ( seq -- seqs ) + dup [ { [ "--" sequence= ] } 1&& ] split-when + dup length 1 > [ nip ] [ drop ] if ; + +MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) + ch dup matching-delimiter { + [ drop "=" swap prefix ] + [ nip 1string ] + } 2cleave :> ( openstreq closestr1 ) ! [= ] + [| n string tag | + n string tag + 2over nth-check-eof { + { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( + { [ dup blank? ] [ + drop dup '[ _ matching-delimiter-string closestr1 2array members lex-until ] dip + swap unclip-last 3array ] } ! ( foo ) + [ drop [ slice-til-whitespace drop ] dip span-slices ] ! (foo) + } cond + ] ; + +: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ; +: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ; +: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ; +: read-string-payload ( n string -- n' string ) + over [ + { CHAR: \\ CHAR: \" } slice-til-separator-inclusive { + { f [ drop ] } + { CHAR: \" [ drop ] } + { CHAR: \\ [ drop next-char-from drop read-string-payload ] } + } case + ] [ + string-expected-got-eof + ] if ; + +:: read-string ( n string tag -- n' string seq ) + n string read-string-payload drop :> n' + n' string + n' [ n string string-expected-got-eof ] unless + n n' 1 - string + n' 1 - n' string + tag -rot 3array ; + +: take-comment ( n string slice -- n' string comment ) + 2over ?nth CHAR: [ = [ + [ 1 + ] 2dip 2over ?nth read-double-matched-bracket + ] [ + [ slice-til-eol drop ] dip swap 2array + ] if ; + +: terminator? ( slice -- ? ) + { + [ ";" sequence= ] + [ "]" sequence= ] + [ "}" sequence= ] + [ ")" sequence= ] + } 1|| ; + +ERROR: expected-length-tokens n string length seq ; +: ensure-no-false ( n string seq -- n string seq ) + dup [ length 0 > ] all? [ [ length ] keep expected-length-tokens ] unless ; + +ERROR: token-expected n string obj ; +ERROR: unexpected-terminator n string slice ; +: read-lowercase-colon ( n string slice -- n' string lowercase-colon ) + dup [ CHAR: : = ] count-tail + '[ + _ [ lex-factor ] replicate ensure-no-false dup [ token-expected ] unless + dup terminator? [ unexpected-terminator ] when + ] dip swap 2array ; + +: (strict-upper?) ( string -- ? ) + { + ! All chars must... + [ + [ + { [ CHAR: A CHAR: Z between? ] [ "':-\\#" member? ] } 1|| + ] all? + ] + ! At least one char must... + [ [ { [ CHAR: A CHAR: Z between? ] [ CHAR: ' = ] } 1|| ] any? ] + } 1&& ; + +: strict-upper? ( string -- ? ) + { [ ":" sequence= ] [ (strict-upper?) ] } 1|| ; + +! +: section-open? ( string -- ? ) + { + [ "<" head? ] + [ length 2 >= ] + [ rest strict-upper? ] + [ ">" tail? not ] + } 1&& ; + +: html-self-close? ( string -- ? ) + { + [ "<" head? ] + [ length 2 >= ] + [ rest strict-upper? not ] + [ [ blank? ] any? not ] + [ "/>" tail? ] + } 1&& ; + +: html-full-open? ( string -- ? ) + { + [ "<" head? ] + [ length 2 >= ] + [ second CHAR: / = not ] + [ rest strict-upper? not ] + [ [ blank? ] any? not ] + [ ">" tail? ] + } 1&& ; + +: html-half-open? ( string -- ? ) + { + [ "<" head? ] + [ length 2 >= ] + [ second CHAR: / = not ] + [ rest strict-upper? not ] + [ [ blank? ] any? not ] + [ ">" tail? not ] + } 1&& ; + +: html-close? ( string -- ? ) + { + [ "= ] + [ rest strict-upper? not ] + [ [ blank? ] any? not ] + [ ">" tail? ] + } 1&& ; + +: special-acute? ( string -- ? ) + { + [ section-open? ] + [ html-self-close? ] + [ html-full-open? ] + [ html-half-open? ] + [ html-close? ] + } 1|| ; + +: upper-colon? ( string -- ? ) + dup { [ length 0 > ] [ [ CHAR: : = ] all? ] } 1&& [ + drop t + ] [ + { + [ length 2 >= ] + [ "\\" head? not ] ! XXX: good? + [ ":" tail? ] + [ dup [ CHAR: : = ] find drop head strict-upper? ] + } 1&& + ] if ; + +: section-close? ( string -- ? ) + { + [ length 2 >= ] + [ "\\" head? not ] ! XXX: good? + [ ">" tail? ] + [ + { + [ but-last strict-upper? ] + [ { [ ";" head? ] [ rest but-last strict-upper? ] } 1&& ] + } 1|| + ] + } 1&& ; + +: read-til-semicolon ( n string slice -- n' string semi ) + dup '[ but-last ";" append ";" 2array { "--" ")" } append lex-colon-until ] dip + swap + ! What ended the FOO: .. ; form? + ! Remove the ; from the payload if present + ! XXX: probably can remove this, T: is dumb + ! Also in stack effects ( T: int -- ) can be ended by -- and ) + dup ?last { + { [ dup ";" sequence= ] [ drop unclip-last 3array ] } + { [ dup ";" tail? ] [ drop unclip-last 3array ] } + { [ dup "--" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } + { [ dup "]" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } + { [ dup "}" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } + { [ dup ")" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } ! (n*quot) breaks + { [ dup section-close? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } + { [ dup upper-colon? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } + [ drop 2array ] + } cond ; + +ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ; +: read-colon ( n string slice -- n' string colon ) + { + { [ dup strict-upper? ] [ read-til-semicolon ] } + { [ dup ":" tail? ] [ dup ":" head? [ read-lowercase-colon ] unless ] } ! :foo: vs foo: + [ ] + } cond ; + +: read-acute-html ( n string slice -- n' string acute ) + { + ! + { [ dup html-self-close? ] [ + ! do nothing special + ] } + ! + { [ dup html-full-open? ] [ + dup [ + rest-slice + dup ">" tail? [ but-last-slice ] when + "" surround 1array lex-until unclip-last + ] dip -rot 3array + ] } + ! " "/>" } lex-until ] dip + ! n seq slice2 slice + over ">" sequence= [ + "" surround array '[ _ lex-until ] dip unclip-last + -rot roll unclip-last [ 3array ] 2dip 3array + ] [ + ! self-contained + swap unclip-last 3array + ] if + ] } + ! + { [ dup html-close? ] [ + ! Do nothing + ] } + [ [ slice-til-whitespace drop ] dip span-slices ] + } cond ; + +: read-acute ( n string slice -- n' string acute ) + [ matching-section-delimiter 1array lex-until ] keep swap unclip-last 3array ; + +! Words like append! and suffix! are allowed for now. +: read-exclamation ( n string slice -- n' string obj ) + dup { [ "!" sequence= ] [ "#!" sequence= ] } 1|| + [ take-comment ] [ merge-slice-til-whitespace ] if ; + +ERROR: no-backslash-payload n string slice ; +: (read-backslash) ( n string slice -- n' string obj ) + merge-slice-til-whitespace dup "\\" tail? [ + ! \ foo, M\ foo + dup [ CHAR: \\ = ] count-tail + '[ + _ [ skip-blank-from slice-til-whitespace drop ] replicate + ensure-no-false + dup [ no-backslash-payload ] unless + ] dip swap 2array + ] when ; + +DEFER: lex-factor-top* +: read-backslash ( n string slice -- n' string obj ) + ! foo\ so far, could be foo\bar{ + ! remove the \ and continue til delimiter/eof + [ "\"!:[{(<>\s\r\n" slice-til-either ] dip swap [ span-slices ] dip + over "\\" head? [ + drop + ! \ foo + dup [ CHAR: \\ = ] all? [ (read-backslash) ] [ merge-slice-til-whitespace ] if + ] [ + ! foo\ or foo\bar (?) + over "\\" tail? [ drop (read-backslash) ] [ lex-factor-top* ] if + ] if ; + +! If the slice is 0 width, we stopped on whitespace. +! Advance the index and read again! + +: read-token-or-whitespace-top ( n string slice -- n' string slice/f ) + dup length 0 = [ [ 1 + ] 2dip drop lex-factor-top ] when ; + +: read-token-or-whitespace-nested ( n string slice -- n' string slice/f ) + dup length 0 = [ [ 1 + ] 2dip drop lex-factor-nested ] when ; + +: lex-factor-fallthrough ( n/f string slice/f ch/f -- n'/f string literal ) + { + { CHAR: \ [ read-backslash ] } + { CHAR: [ [ read-bracket ] } + { CHAR: { [ read-brace ] } + { CHAR: ( [ read-paren ] } + { CHAR: ] [ ] } + { CHAR: } [ ] } + { CHAR: ) [ ] } + { CHAR: " [ read-string ] } + { CHAR: ! [ read-exclamation ] } + { CHAR: > [ + [ [ CHAR: > = not ] slice-until ] dip merge-slices + dup section-close? [ + [ slice-til-whitespace drop ] dip ?span-slices + ] unless + ] } + { f [ ] } + } case ; + +! Inside a FOO: or a +: lex-factor-nested* ( n/f string slice/f ch/f -- n'/f string literal ) + { + ! Nested ``A: a B: b`` so rewind and let the parser get it top-level + { CHAR: : [ + ! A: B: then interrupt the current parser + ! A: b: then keep going + merge-slice-til-whitespace + dup { [ upper-colon? ] [ ":" = ] } 1|| + ! dup upper-colon? + [ rewind-slice f ] + [ read-colon ] if + ] } + { CHAR: < [ + ! FOO: a b + ! FOO: a b + ! FOO: a b + ! FOO: a b + + ! if we are in a FOO: and we hit a or + [ slice-til-whitespace drop ] dip span-slices + dup section-open? [ rewind-slice f ] when + ] } + { CHAR: \s [ read-token-or-whitespace-nested ] } + { CHAR: \r [ read-token-or-whitespace-nested ] } + { CHAR: \n [ read-token-or-whitespace-nested ] } + [ lex-factor-fallthrough ] + } case ; + +: lex-factor-nested ( n/f string -- n'/f string literal ) + ! skip-whitespace + "\"\\!:[{(]})<>\s\r\n" slice-til-either + lex-factor-nested* ; inline + +: lex-factor-top* ( n/f string slice/f ch/f -- n'/f string literal ) + { + { CHAR: : [ merge-slice-til-whitespace read-colon ] } + { CHAR: < [ + ! FOO: a b + ! FOO: a b + ! FOO: a b + ! FOO: a b + + ! if we are in a FOO: and we hit a \s\r\n" slice-til-either + lex-factor-top* ; inline + +ERROR: compound-syntax-disallowed n seq obj ; +: check-for-compound-syntax ( n/f seq obj -- n/f seq obj ) + dup length 1 > [ compound-syntax-disallowed ] when ; + +: check-compound-loop ( n/f string -- n/f string ? ) + [ ] [ peek-from ] [ previous-from ] 2tri + [ blank? ] bi@ or not ! no blanks between tokens + pick and ; ! and a valid index + +: lex-factor ( n/f string/f -- n'/f string literal/f ) + [ + ! Compound syntax loop + [ + lex-factor-top f like [ , ] when* + ! concatenated syntax ( a )[ a 1 + ]( b ) + check-compound-loop + ] loop + ] { } make + check-for-compound-syntax + ! concat ! "ALIAS: n*quot (n*quot)" string>literals ... breaks here + ?first f like ; + +: string>literals ( string -- sequence ) + [ 0 ] dip [ + [ lex-factor [ , ] when* over ] loop + ] { } make 2nip ; + +: vocab>literals ( vocab -- sequence ) + ".private" ?tail drop + vocab-source-path utf8 file-contents string>literals ; + +: path>literals ( path -- sequence ) + utf8 file-contents string>literals ; + +: lex-paths ( vocabs -- assoc ) + [ [ path>literals ] [ nip ] recover ] map-zip ; + +: lex-vocabs ( vocabs -- assoc ) + [ [ vocab>literals ] [ nip ] recover ] map-zip ; + +: failed-lexing ( assoc -- assoc' ) [ nip array? ] assoc-reject ; + +: lex-core ( -- assoc ) core-bootstrap-vocabs lex-vocabs ; +: lex-basis ( -- assoc ) basis-vocabs lex-vocabs ; +: lex-extra ( -- assoc ) extra-vocabs lex-vocabs ; +: lex-roots ( -- assoc ) lex-core lex-basis lex-extra 3append ; + +: lex-docs ( -- assoc ) all-docs-paths lex-paths ; +: lex-tests ( -- assoc ) all-tests-paths lex-paths ; + +: lex-all ( -- assoc ) + lex-roots lex-docs lex-tests 3append ; diff --git a/extra/modern/out/authors.txt b/extra/modern/out/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/modern/out/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/modern/out/out.factor b/extra/modern/out/out.factor new file mode 100644 index 0000000000..86a8cf81d9 --- /dev/null +++ b/extra/modern/out/out.factor @@ -0,0 +1,108 @@ +! Copyright (C) 2017 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs combinators.short-circuit +constructors continuations io io.encodings.utf8 io.files +io.streams.string kernel modern modern.paths modern.slices +prettyprint sequences sequences.extras splitting strings +vocabs.loader ; +IN: modern.out + +: token? ( obj -- ? ) + { [ slice? ] [ seq>> string? ] } 1&& ; + +TUPLE: renamed slice string ; +CONSTRUCTOR: renamed ( slice string -- obj ) ; + +: trim-before-newline ( seq -- seq' ) + dup [ char: \s = not ] find + { char: \r char: \n } member? + [ tail-slice ] [ drop ] if ; + +: write-whitespace ( last obj -- ) + swap + [ swap slice-between ] [ slice-before ] if* + trim-before-newline io::write ; + +GENERIC: write-literal* ( last obj -- last' ) +M: slice write-literal* [ write-whitespace ] [ write ] [ ] tri ; +M: array write-literal* [ write-literal* ] each ; +M: renamed write-literal* [ slice>> write-whitespace ] [ string>> write ] [ slice>> ] tri ; ! for refactoring + + + +DEFER: map-literals +: (map-literals) ( obj quot: ( obj -- obj' ) -- seq ) + over [ array? ] any? [ + [ call drop ] [ map-literals ] 2bi + ] [ + over array? [ map-literals ] [ call ] if + ] if ; inline recursive + +: map-literals ( obj quot: ( obj -- obj' ) -- seq ) + '[ _ (map-literals) ] map ; inline recursive + + + +! Start with no slice as ``last`` +: write-literal ( obj -- ) f swap write-literal* drop ; + +: write-modern-string ( seq -- string ) + [ write-literal ] with-string-writer ; inline + +: write-modern-path ( seq path -- ) + utf8 [ write-literal nl ] with-file-writer ; inline + +: write-modern-vocab ( seq vocab -- ) + vocab-source-path write-modern-path ; inline + +: rewrite-path ( path quot: ( obj -- obj' ) -- ) + ! dup print + '[ [ path>literals _ map-literals ] [ ] bi write-modern-path ] + [ drop . ] recover ; inline recursive + +: rewrite-string ( string quot: ( obj -- obj' ) -- ) + ! dup print + [ string>literals ] dip map-literals write-modern-string ; inline recursive + +: rewrite-paths ( seq quot: ( obj -- obj' ) -- ) '[ _ rewrite-path ] each ; inline recursive + +: rewrite-vocab ( vocab quot: ( obj -- obj' ) -- ) + [ [ vocab>literals ] dip map-literals ] 2keep drop write-modern-vocab ; inline recursive + +: rewrite-string-exact ( string -- string' ) + string>literals write-modern-string ; + +![[ +: rewrite-path-exact ( path -- ) + [ path>literals ] [ ] bi write-modern-path ; + +: rewrite-vocab-exact ( name -- ) + vocab-source-path rewrite-path-exact ; + +: rewrite-paths ( paths -- ) + [ rewrite-path-exact ] each ; +]] + +: strings-core-to-file ( -- ) + core-bootstrap-vocabs + [ ".private" ?tail drop vocab-source-path utf8 file-contents ] map-zip + [ "[========[" dup matching-delimiter-string surround ] assoc-map + [ + first2 [ "VOCAB: " prepend ] dip " " glue + ] map + [ " " prepend ] map "\n\n" join + "" surround "resource:core-strings.factor" utf8 set-file-contents ; + +: parsed-core-to-file ( -- ) + core-bootstrap-vocabs + [ vocab>literals ] map-zip + [ + first2 [ "strings + ! [ 3 head ] [ 3 tail* ] bi [ >strings ] bi@ { "..." } glue + ";VOCAB>" 3array + ] map 1array + + { "" } surround "resource:core-parsed.factor" utf8 [ ... ] with-file-writer ; diff --git a/extra/modern/paths/authors.txt b/extra/modern/paths/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/modern/paths/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/modern/paths/paths.factor b/extra/modern/paths/paths.factor new file mode 100644 index 0000000000..d8f896e471 --- /dev/null +++ b/extra/modern/paths/paths.factor @@ -0,0 +1,107 @@ +! Copyright (C) 2015 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators.smart io.files kernel sequences +splitting vocabs.files vocabs.hierarchy vocabs.loader +vocabs.metadata sets ; +IN: modern.paths + +ERROR: not-a-source-path path ; + +: vocabs-from ( root -- vocabs ) + "" disk-vocabs-in-root/prefix + no-prefixes [ name>> ] map ; + +: core-vocabs ( -- seq ) "resource:core" vocabs-from ; +: less-core-test-vocabs ( seq -- seq' ) + { + "vocabs.loader.test.a" + "vocabs.loader.test.b" + "vocabs.loader.test.c" + "vocabs.loader.test.d" + "vocabs.loader.test.e" + "vocabs.loader.test.f" + "vocabs.loader.test.g" + "vocabs.loader.test.h" + "vocabs.loader.test.i" + "vocabs.loader.test.j" + "vocabs.loader.test.k" + "vocabs.loader.test.l" + "vocabs.loader.test.m" + "vocabs.loader.test.n" + "vocabs.loader.test.o" + "vocabs.loader.test.p" + } diff ; + +: core-bootstrap-vocabs ( -- seq ) + core-vocabs less-core-test-vocabs ; + +: basis-vocabs ( -- seq ) "resource:basis" vocabs-from ; +: extra-vocabs ( -- seq ) "resource:extra" vocabs-from ; +: all-vocabs ( -- seq ) + [ + core-vocabs + basis-vocabs + extra-vocabs + ] { } append-outputs-as ; + +: filter-exists ( seq -- seq' ) [ exists? ] filter ; + +! These paths have syntax errors on purpose... +: reject-some-paths ( seq -- seq' ) + { + "resource:core/vocabs/loader/test/a/a.factor" + "resource:core/vocabs/loader/test/b/b.factor" + "resource:core/vocabs/loader/test/c/c.factor" + ! Here down have parse errors + "resource:core/vocabs/loader/test/d/d.factor" + "resource:core/vocabs/loader/test/e/e.factor" + "resource:core/vocabs/loader/test/f/f.factor" + "resource:core/vocabs/loader/test/g/g.factor" + "resource:core/vocabs/loader/test/h/h.factor" + "resource:core/vocabs/loader/test/i/i.factor" + "resource:core/vocabs/loader/test/j/j.factor" + "resource:core/vocabs/loader/test/k/k.factor" + "resource:core/vocabs/loader/test/l/l.factor" + "resource:core/vocabs/loader/test/m/m.factor" + "resource:core/vocabs/loader/test/n/n.factor" + "resource:core/vocabs/loader/test/o/o.factor" + "resource:core/vocabs/loader/test/p/p.factor" + } diff + ! Don't parse .modern files yet + [ ".modern" tail? ] reject ; + +: modern-source-paths ( names -- paths ) + [ vocab-source-path ] map filter-exists reject-some-paths ; +: modern-docs-paths ( names -- paths ) + [ vocab-docs-path ] map filter-exists reject-some-paths ; +: modern-tests-paths ( names -- paths ) + [ vocab-tests ] map concat filter-exists reject-some-paths ; + +: all-source-paths ( -- seq ) + all-vocabs modern-source-paths ; + +: core-docs-paths ( -- seq ) core-vocabs modern-docs-paths ; +: basis-docs-paths ( -- seq ) basis-vocabs modern-docs-paths ; +: extra-docs-paths ( -- seq ) extra-vocabs modern-docs-paths ; + +: core-test-paths ( -- seq ) core-vocabs modern-tests-paths ; +: basis-test-paths ( -- seq ) basis-vocabs modern-tests-paths ; +: extra-test-paths ( -- seq ) extra-vocabs modern-tests-paths ; + + +: all-docs-paths ( -- seq ) all-vocabs modern-docs-paths ; + : all-tests-paths ( -- seq ) all-vocabs modern-tests-paths ; + +: all-paths ( -- seq ) + [ + all-source-paths all-docs-paths all-tests-paths + ] { } append-outputs-as ; + +: core-source-paths ( -- seq ) + core-vocabs modern-source-paths reject-some-paths ; +: basis-source-paths ( -- seq ) + basis-vocabs + modern-source-paths reject-some-paths ; +: extra-source-paths ( -- seq ) + extra-vocabs + modern-source-paths reject-some-paths ; diff --git a/extra/modern/slices/slices.factor b/extra/modern/slices/slices.factor new file mode 100644 index 0000000000..ad14276a06 --- /dev/null +++ b/extra/modern/slices/slices.factor @@ -0,0 +1,228 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors assocs fry kernel locals math sequences +sequences.deep sequences.extras strings unicode ; +IN: modern.slices + +: >strings ( seq -- str ) + [ dup slice? [ >string ] when ] deep-map ; + +: matching-delimiter ( ch -- ch' ) + H{ + { CHAR: ( CHAR: ) } + { CHAR: [ CHAR: ] } + { CHAR: { CHAR: } } + { CHAR: < CHAR: > } + { CHAR: : CHAR: ; } + } ?at drop ; + +: matching-delimiter-string ( string -- string' ) + [ matching-delimiter ] map ; + +: matching-section-delimiter ( string -- string' ) + dup ":" tail? [ + rest but-last ";" ">" surround + ] [ + rest ">" append + ] if ; + +ERROR: unexpected-end n string ; +: nth-check-eof ( n string -- nth ) + 2dup ?nth [ 2nip ] [ unexpected-end ] if* ; + +: peek-from ( n/f string -- ch ) + over [ ?nth ] [ 2drop f ] if ; + +: previous-from ( n/f string -- ch ) + over [ [ 1 - ] dip ?nth ] [ 2drop f ] if ; + +! Allow eof +: next-char-from ( n/f string -- n'/f string ch/f ) + over [ + 2dup ?nth [ [ 1 + ] 2dip ] [ f ] if* + ] [ + [ 2drop f ] [ nip ] 2bi f + ] if ; + +: prev-char-from-slice-end ( slice -- ch/f ) + [ to>> 2 - ] [ seq>> ] bi ?nth ; + +: prev-char-from-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: next-char-from-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: char-before-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: char-after-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? ) + [ find-from ] 2keep drop + pick [ drop t ] [ length -rot nip f ] if ; inline + +: skip-blank-from ( n string -- n' string ) + over [ + [ [ blank? not ] find-from* 2drop ] keep + ] when ; inline + +: skip-til-eol-from ( n string -- n' string ) + [ [ "\r\n" member? ] find-from* 2drop ] keep ; inline + +! Don't include the whitespace in the slice +:: slice-til-whitespace ( n string -- n' string slice/f ch/f ) + n [ + n string [ "\s\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + f string f f + ] if ; inline + +:: (slice-until) ( n string quot -- n' string slice/f ch/f ) + n string quot find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-until ( n string quot -- n' string slice/f ) + (slice-until) drop ; inline + +:: slice-til-not-whitespace ( n string -- n' string slice/f ch/f ) + n [ + n string [ "\s\r\n" member? not ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string f f + ] if ; inline + +: skip-whitespace ( n/f string -- n'/f string ) + slice-til-not-whitespace 2drop ; + +: empty-slice-end ( seq -- slice ) + [ length dup ] [ ] bi ; inline + +: empty-slice-from ( n seq -- slice ) + dupd ; inline + +:: slice-til-eol ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +:: merge-slice-til-eol-slash'' ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n\\" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +: merge-slice-til-whitespace ( n string slice -- n' string slice' ) + pick [ + [ slice-til-whitespace drop ] dip merge-slices + ] when ; + +: merge-slice-til-eol ( n string slice -- n' string slice' ) + [ slice-til-eol drop ] dip merge-slices ; + +: slice-between ( slice1 slice2 -- slice ) + ! ensure-same-underlying + slice-order-by-from + [ to>> ] + [ [ from>> 2dup < [ swap ] unless ] [ seq>> ] bi ] bi* ; + +: slice-before ( slice -- slice' ) + [ drop 0 ] [ from>> ] [ seq>> ] tri ; + +: (?nth) ( n/f string/f -- obj/f ) + over [ (?nth) ] [ 2drop f ] if ; + +:: merge-slice-til-eol-slash' ( n string slice -- n' string slice/f ch/f ) + n string merge-slice-til-eol-slash'' :> ( n' string' slice' ch' ) + ch' CHAR: \\ = [ + n' 1 + string' (?nth) "\r\n" member? [ + n' 2 + string' slice slice' span-slices merge-slice-til-eol-slash' + ] [ + "omg" throw + ] if + ] [ + n' string' slice slice' span-slices ch' + ] if ; + +! Supports \ at eol (with no space after it) +: slice-til-eol-slash ( n string -- n' string slice/f ch/f ) + 2dup empty-slice-from merge-slice-til-eol-slash' ; + +:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f ) + n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f ) + slice-til-separator-inclusive dup [ + [ [ 1 - ] change-to ] dip + ] when ; + +! Takes at least one character if not whitespace +:: slice-til-either ( n string tokens -- n'/f string slice/f ch/f ) + n [ + n string '[ tokens member? ] find-from + dup "\s\r\n" member? [ + :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch + ] if + ] [ + f string f f + ] if ; inline + +ERROR: subseq-expected-but-got-eof n string expected ; + +:: slice-til-string ( n string search -- n' string payload end-string ) + search string n subseq-start-from :> n' + n' [ n string search subseq-expected-but-got-eof ] unless + n' search length + string + n n' string ? + n' dup search length + string ? ; + +: modify-from ( slice n -- slice' ) + '[ from>> _ + ] [ to>> ] [ seq>> ] tri ; + +: modify-to ( slice n -- slice' ) + [ [ from>> ] [ to>> ] [ seq>> ] tri ] dip + swap [ + ] dip ; + +! { CHAR: \] [ read-closing ] } +! { CHAR: \} [ read-closing ] } +! { CHAR: \) [ read-closing ] } +: read-closing ( n string tok -- n string tok ) + dup length 1 = [ + -1 modify-to [ 1 - ] 2dip + ] unless ; + +: rewind-slice ( n string slice -- n' string ) + pick [ + length swap [ - ] dip + ] [ + [ nip ] dip [ [ length ] bi@ - ] 2keep drop + ] if ; inline