diff --git a/extra/modern/modern.factor b/extra/modern/modern.factor new file mode 100644 index 0000000000..4b4611a405 --- /dev/null +++ b/extra/modern/modern.factor @@ -0,0 +1,234 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs combinators +combinators.short-circuit continuations fry io.encodings.utf8 +io.files kernel locals make math math.order modern.paths +modern.slices namespaces sequences sequences.extras shuffle +splitting strings unicode ; +IN: modern + +ERROR: string-expected-got-eof n string ; +ERROR: long-opening-mismatch tag open n string ch ; + +SYMBOL: delimiter-stack +: push-delimiter-stack ( obj -- ) delimiter-stack get push ; +: pop-delimiter-stack ( -- obj ) delimiter-stack get pop ; + +! (( )) [[ ]] {{ }} +MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) ) + open-ch dup matching-delimiter { + [ drop 2 swap ] + [ drop 1string ] + [ nip 2 swap ] + } 2cleave :> ( openstr2 openstr1 closestr2 ) + [| n string tag! ch | + ch { + { CHAR: = [ + n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) + ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless + opening matching-delimiter-string :> needle + + n' string' needle slice-til-string :> ( n'' string'' payload closing ) + n'' string + payload closing tag opening 4array + ] } + { open-ch [ + tag 1 cut-slice* swap tag! 1 modify-to :> opening + n 1 + string closestr2 slice-til-string :> ( n' string' payload closing ) + n' string + payload closing tag opening 4array + ] } + [ [ tag openstr2 n string ] dip long-opening-mismatch ] + } case + ] ; + +: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ; +: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ; +: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ; + +DEFER: lex-factor +ERROR: lex-expected-but-got-eof n string expected ; +! For implementing [ { ( +: lex-until ( n string tags -- n' string payload closing ) + pick [ + 3dup '[ + [ + lex-factor dup , [ + dup [ + ! } gets a chance, but then also full seq { } after recursion... + [ _ ] dip '[ _ sequence= ] any? not + ] [ + drop t ! loop again? + ] if + ] [ + _ _ _ lex-expected-but-got-eof + ] if* + ] loop + ] { } make unclip-last + ] [ + lex-expected-but-got-eof + ] if ; + +: lex-colon-until ( n string tags -- n' string payload closing ) + pick [ + 3dup '[ + [ + lex-factor dup [ , ] when* [ + dup [ + ! } gets a chance, but then also full seq { } after recursion... + [ _ ] dip '[ _ sequence= ] any? not + ] [ + drop t ! loop again? + ] if + ] [ + _ _ _ lex-expected-but-got-eof + ] if* + ] loop + ] { } make unclip-last + ] [ + lex-expected-but-got-eof + ] if ; + +: split-double-dash ( seq -- seqs ) + dup [ { [ "--" sequence= ] } 1&& ] split-when + dup length 1 > [ nip ] [ drop ] if ; + +MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) + ch dup matching-delimiter { + [ drop "=" swap prefix ] + [ nip 1string ] + } 2cleave :> ( openstreq closestr1 ) ! [= ] + [| n string tag | + n string tag + 2over nth-check-eof { + { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( + { [ dup blank? ] [ + drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip + 1 cut-slice* 2swap 4array ] } ! ( foo ) + [ drop [ slice-til-whitespace drop ] dip span-slices ] ! (foo) + } cond + ] ; + +: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ; +: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ; +: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ; +: read-string-payload ( n string -- n' string ) + over [ + { CHAR: \ CHAR: " } slice-til-separator-inclusive { + { f [ drop ] } + { CHAR: " [ drop ] } + { CHAR: \ [ drop next-char-from drop read-string-payload ] } + } case + ] [ + string-expected-got-eof + ] if ; + +:: read-string ( n string tag -- n' string seq ) + n string read-string-payload drop :> n' + n' string + n' [ n string string-expected-got-eof ] unless + n n' 1 - string + n' 1 - n' string + tag 1 cut-slice* 4array ; + +: take-comment ( n string slice -- n' string comment ) + 2over ?nth CHAR: [ = [ + [ 1 + ] 2dip 2over ?nth read-double-matched-bracket + ] [ + [ slice-til-eol drop dup ] dip 1 cut-slice* 4array + ] if ; + +: read-til-semicolon ( n string slice -- n' string semi ) + dup '[ but-last ";" append ";" 2array lex-colon-until ] dip + -rot 3array ; + +: read-word-or-til-semicolon ( n string slice -- n' string obj ) + 2over next-char-from* "\s\r\n" member? [ + read-til-semicolon + ] [ + merge-slice-til-whitespace + ] if ; + +: read-lowercase-colon ( n string slice -- n' string lowercase-colon ) + [ lex-factor ] dip swap 2array ; + +: strict-upper? ( string -- ? ) + [ { [ CHAR: A CHAR: Z between? ] [ "#:-" member? ] } 1|| ] all? ; + +ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ; +: read-colon ( n string slice -- n' string colon ) + dup length 1 = [ + dup prev-char-from-slice { CHAR: \s CHAR: \r CHAR: \n f } member? [ + read-til-semicolon + ] [ + read-lowercase-colon + ] if + ] [ + { + { [ dup strict-upper? ] [ B read-til-semicolon ] } + [ read-lowercase-colon ] + } cond + ] if ; + +: read-acute ( n string slice -- n' string acute ) + ; + +! Words like append! and suffix! are allowed for now. +: read-exclamation ( n string slice -- n' string obj ) + dup { [ "!" sequence= ] [ "#!" sequence= ] } 1|| + [ take-comment ] [ merge-slice-til-whitespace ] if ; + +ERROR: backslash-expects-whitespace slice ; +: read-backslash ( n string slice -- n' string obj ) + 2over peek-from blank? [ + ! \ foo, M\ foo + [ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* 4array + ] [ + ! M\N + merge-slice-til-whitespace + ] if ; + +! If the slice is 0 width, we stopped on whitespace. +! Advance the index and read again! +: read-token-or-whitespace ( n string slice -- n' string slice ) + dup length 0 = + [ drop [ 1 + ] dip lex-factor ] when ; + +ERROR: mismatched-terminator n string slice ; +: read-terminator ( n string slice -- n' string slice ) ; + +: lex-factor ( n/f string -- n'/f string literal ) + over [ + skip-whitespace "\"\\!:[{(\s\r\n" slice-til-either { + ! { CHAR: ` [ read-backtick ] } + { CHAR: " [ read-string ] } + { CHAR: \ [ read-backslash ] } + { CHAR: ! [ read-exclamation ] } + { CHAR: : [ read-colon ] } + { CHAR: [ [ read-bracket ] } + { CHAR: { [ read-brace ] } + { CHAR: ( [ read-paren ] } + { CHAR: < [ read-acute ] } + { CHAR: \s [ read-token-or-whitespace ] } + { CHAR: \r [ read-token-or-whitespace ] } + { CHAR: \n [ read-token-or-whitespace ] } + { f [ f like ] } + } case + ] [ + f + ] if ; inline + +: string>literals ( string -- sequence ) + [ V{ } clone delimiter-stack ] dip '[ + _ [ 0 ] dip [ lex-factor ] loop>array 2nip + ] with-variable ; + +: vocab>literals ( vocab -- sequence ) + ".private" ?tail drop + modern-source-path utf8 file-contents string>literals ; + +: path>literals ( path -- sequence ) + utf8 file-contents string>literals ; + +: lex-core ( -- assoc ) + core-bootstrap-vocabs [ [ vocab>literals ] [ nip ] recover ] map-zip ; diff --git a/extra/modern/slices/slices.factor b/extra/modern/slices/slices.factor new file mode 100644 index 0000000000..ac1bb967bc --- /dev/null +++ b/extra/modern/slices/slices.factor @@ -0,0 +1,203 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors assocs fry kernel locals math math.private +sequences sequences.extras sequences.private unicode ; +IN: modern.slices + +: matching-delimiter ( ch -- ch' ) + H{ + { CHAR: ( CHAR: ) } + { CHAR: [ CHAR: ] } + { CHAR: { CHAR: } } + { CHAR: < CHAR: > } + { CHAR: : CHAR: ; } + } ?at drop ; + +: matching-delimiter-string ( string -- string' ) + [ matching-delimiter ] map ; + +ERROR: unexpected-end n string ; +: nth-check-eof ( n string -- nth ) + 2dup ?nth [ 2nip ] [ unexpected-end ] if* ; + +: peek-from ( n/f string -- ch ) + over [ ?nth ] [ 2drop f ] if ; + +! Allow eof +: next-char-from ( n/f string -- n'/f string ch/f ) + over [ + 2dup ?nth [ [ 1 + ] 2dip ] [ f ] if* + ] [ + [ 2drop f ] [ nip ] 2bi f + ] if ; + +: prev-char-from-slice-end ( slice -- ch/f ) + [ to>> 2 - ] [ seq>> ] bi ?nth ; + +: prev-char-from-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: next-char-from-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: char-before-slice ( slice -- ch/f ) + [ from>> 1 - ] [ seq>> ] bi ?nth ; + +: char-after-slice ( slice -- ch/f ) + [ to>> ] [ seq>> ] bi ?nth ; + +: next-char-from* ( n/f string -- ch/f ) + next-char-from 2nip ; + +: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? ) + [ find-from ] keep + pick [ drop t ] [ length -rot nip f ] if ; inline + +: skip-blank-from ( n string -- n' string ) + [ [ blank? not ] find-from* 2drop ] keep ; inline + +: skip-til-eol-from ( n string -- n' string ) + [ [ "\r\n" member? ] find-from* 2drop ] keep ; inline + +! Don't include the whitespace in the slice +:: slice-til-whitespace ( n string -- n' string slice/f ch/f ) + n string [ "\s\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +:: slice-until' ( n string quot -- n' string slice/f ch/f ) + n string quot find-from :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-until ( n string quot -- n' string slice/f ) + slice-until' drop ; inline + +:: slice-til-not-whitespace ( n string -- n' string slice/f ch/f ) + n [ + n string [ "\s\r\n" member? not ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string f f + ] if ; inline + +: skip-whitespace ( n string -- n' string ) + slice-til-not-whitespace 2drop ; + +: empty-slice-end ( seq -- slice ) + [ length dup ] [ ] bi ; inline + +: empty-slice-from ( n seq -- slice ) + dupd ; inline + +:: slice-til-eol ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +:: merge-slice-til-eol-slash'' ( n string -- n' string slice/f ch/f ) + n [ + n string '[ "\r\n\\" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + n string string empty-slice-end f + ] if ; inline + +: merge-slice-til-whitespace ( n string slice -- n' string slice' ) + [ slice-til-whitespace drop ] dip merge-slices ; + +: merge-slice-til-eol ( n string slice -- n' string slice' ) + [ slice-til-eol drop ] dip merge-slices ; + +: slice-between ( slice1 slice2 -- slice ) + ! ensure-same-underlying + slice-order-by-from + [ to>> ] + [ [ from>> 2dup < [ swap ] unless ] [ seq>> ] bi ] bi* ; + +: slice-before ( slice -- slice' ) + [ drop 0 ] [ from>> ] [ seq>> ] tri ; + +: ?nth' ( n/f string/f -- obj/f ) + over [ ?nth ] [ 2drop f ] if ; + +:: merge-slice-til-eol-slash' ( n string slice -- n' string slice/f ch/f ) + n string merge-slice-til-eol-slash'' :> ( n' string' slice' ch' ) + ch' CHAR: \ = [ + n' 1 + string' ?nth' "\r\n" member? [ + n' 2 + string' slice slice' span-slices merge-slice-til-eol-slash' + ] [ + "omg" throw + ] if + ] [ + n' string' slice slice' span-slices ch' + ] if ; + +! Supports \ at eol (with no space after it) +: slice-til-eol-slash ( n string -- n' string slice/f ch/f ) + 2dup empty-slice-from merge-slice-til-eol-slash' ; + +:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f ) + n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f ) + slice-til-separator-inclusive dup [ + [ [ 1 - ] change-to ] dip + ] when ; + +:: slice-til-either ( n string tokens -- n'/f string slice/f ch ) + n [ + n string '[ tokens member? ] find-from + dup "\s\r\n" member? [ + :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch + ] if + ] [ + f string f f + ] if ; inline + +ERROR: subseq-expected-but-got-eof n string expected ; + +:: slice-til-string ( n string search -- n' string payload end-string ) + search string n subseq-start-from :> n' + n' [ n string search subseq-expected-but-got-eof ] unless + n' search length + string + n n' string ? + n' dup search length + string ? ; + +: modify-from ( slice n -- slice' ) + '[ from>> _ + ] [ to>> ] [ seq>> ] tri ; + +: modify-to ( slice n -- slice' ) + [ [ from>> ] [ to>> ] [ seq>> ] tri ] dip + swap [ + ] dip ; + +! { CHAR: ] [ read-closing ] } +! { CHAR: } [ read-closing ] } +! { CHAR: ) [ read-closing ] } +: read-closing ( n string tok -- n string tok ) + dup length 1 = [ + -1 modify-to [ 1 - ] 2dip + ] unless ; +