modern: Add a stripped-down parser from what I had.

2017-08-21 21:29:09 -05:00 · 2017-08-21 21:29:09 -05:00 · 218530209f
parent 081b642dee
commit 218530209f
2 changed files with 437 additions and 0 deletions
--- a/extra/modern/modern.factor
+++ b/extra/modern/modern.factor
@ -0,0 +1,234 @@
+! Copyright (C) 2016 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators
+combinators.short-circuit continuations fry io.encodings.utf8
+io.files kernel locals make math math.order modern.paths
+modern.slices namespaces sequences sequences.extras shuffle
+splitting strings unicode ;
+IN: modern
+
+ERROR: string-expected-got-eof n string ;
+ERROR: long-opening-mismatch tag open n string ch ;
+
+SYMBOL: delimiter-stack
+: push-delimiter-stack ( obj -- ) delimiter-stack get push ;
+: pop-delimiter-stack ( -- obj ) delimiter-stack get pop ;
+
+! (( )) [[ ]] {{ }}
+MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) )
+    open-ch dup matching-delimiter {
+        [ drop 2 swap <string> ]
+        [ drop 1string ]
+        [ nip 2 swap <string> ]
+    } 2cleave :> ( openstr2 openstr1 closestr2 )
+    [| n string tag! ch |
+        ch {
+            { CHAR: = [
+                n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch )
+                ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless
+                opening matching-delimiter-string :> needle
+
+                n' string' needle slice-til-string :> ( n'' string'' payload closing )
+                n'' string
+                payload closing tag opening 4array
+            ] }
+            { open-ch [
+                tag 1 cut-slice* swap tag! 1 modify-to :> opening
+                n 1 + string closestr2 slice-til-string :> ( n' string' payload closing )
+                n' string
+                payload closing tag opening 4array
+            ] }
+            [ [ tag openstr2 n string ] dip long-opening-mismatch ]
+        } case
+     ] ;
+
+: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ;
+: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ;
+: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ;
+
+DEFER: lex-factor
+ERROR: lex-expected-but-got-eof n string expected ;
+! For implementing [ { (
+: lex-until ( n string tags -- n' string payload closing )
+    pick [
+        3dup '[
+            [
+                lex-factor dup , [
+                    dup [
+                        ! } gets a chance, but then also full seq { } after recursion...
+                        [ _ ] dip '[ _ sequence= ] any? not
+                    ] [
+                        drop t ! loop again?
+                    ] if
+                ] [
+                    _ _ _ lex-expected-but-got-eof
+                ] if*
+            ] loop
+        ] { } make unclip-last
+    ] [
+        lex-expected-but-got-eof
+    ] if ;
+
+: lex-colon-until ( n string tags -- n' string payload closing )
+    pick [
+        3dup '[
+            [
+                lex-factor dup [ , ] when* [
+                    dup [
+                        ! } gets a chance, but then also full seq { } after recursion...
+                        [ _ ] dip '[ _ sequence= ] any? not
+                    ] [
+                        drop t ! loop again?
+                    ] if
+                ] [
+                    _ _ _ lex-expected-but-got-eof
+                ] if*
+            ] loop
+        ] { } make unclip-last
+    ] [
+        lex-expected-but-got-eof
+    ] if ;
+
+: split-double-dash ( seq -- seqs )
+    dup [ { [ "--" sequence= ] } 1&& ] split-when
+    dup length 1 > [ nip ] [ drop ] if ;
+
+MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) )
+    ch dup matching-delimiter {
+        [ drop "=" swap prefix ]
+        [ nip 1string ]
+    } 2cleave :> ( openstreq closestr1 )  ! [= ]
+    [| n string tag |
+        n string tag
+        2over nth-check-eof {
+            { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or ((
+            { [ dup blank? ] [
+                drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip
+                1 cut-slice* 2swap 4array ] } ! ( foo )
+            [ drop [ slice-til-whitespace drop ] dip span-slices ]  ! (foo)
+        } cond
+    ] ;
+
+: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ;
+: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ;
+: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ;
+: read-string-payload ( n string -- n' string )
+    over [
+        { CHAR: \ CHAR: " } slice-til-separator-inclusive {
+            { f [ drop ] }
+            { CHAR: " [ drop ] }
+            { CHAR: \ [ drop next-char-from drop read-string-payload ] }
+        } case
+    ] [
+        string-expected-got-eof
+    ] if ;
+
+:: read-string ( n string tag -- n' string seq )
+    n string read-string-payload drop :> n'
+    n' string
+    n' [ n string string-expected-got-eof ] unless
+    n n' 1 - string <slice>
+    n' 1 - n' string <slice>
+    tag 1 cut-slice* 4array ;
+
+: take-comment ( n string slice -- n' string comment )
+    2over ?nth CHAR: [ = [
+        [ 1 + ] 2dip 2over ?nth read-double-matched-bracket
+    ] [
+        [ slice-til-eol drop dup ] dip 1 cut-slice* 4array
+    ] if ;
+
+: read-til-semicolon ( n string slice -- n' string semi )
+    dup '[ but-last ";" append ";" 2array lex-colon-until ] dip
+    -rot 3array ;
+
+: read-word-or-til-semicolon ( n string slice -- n' string obj )
+    2over next-char-from* "\s\r\n" member? [
+        read-til-semicolon
+    ] [
+        merge-slice-til-whitespace
+    ] if ;
+
+: read-lowercase-colon ( n string slice -- n' string lowercase-colon )
+    [ lex-factor ] dip swap 2array ;
+
+: strict-upper? ( string -- ? )
+    [ { [ CHAR: A CHAR: Z between? ] [ "#:-" member? ] } 1|| ] all? ;
+
+ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ;
+: read-colon ( n string slice -- n' string colon )
+    dup length 1 = [
+        dup prev-char-from-slice { CHAR: \s CHAR: \r CHAR: \n f } member? [
+            read-til-semicolon
+        ] [
+            read-lowercase-colon
+        ] if
+    ] [
+        {
+            { [ dup strict-upper? ] [ B read-til-semicolon ] }
+            [ read-lowercase-colon ]
+        } cond
+    ] if ;
+
+: read-acute ( n string slice -- n' string acute )
+    ;
+
+! Words like append! and suffix! are allowed for now.
+: read-exclamation ( n string slice -- n' string obj )
+    dup { [ "!" sequence= ] [ "#!" sequence= ] } 1||
+    [ take-comment ] [ merge-slice-til-whitespace ] if ;
+
+ERROR: backslash-expects-whitespace slice ;
+: read-backslash ( n string slice -- n' string obj )
+    2over peek-from blank? [
+        ! \ foo, M\ foo
+        [ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* 4array
+    ] [
+        ! M\N
+        merge-slice-til-whitespace
+    ] if ;
+
+! If the slice is 0 width, we stopped on whitespace.
+! Advance the index and read again!
+: read-token-or-whitespace ( n string slice -- n' string slice )
+    dup length 0 =
+    [ drop [ 1 + ] dip lex-factor ] when ;
+
+ERROR: mismatched-terminator n string slice ;
+: read-terminator ( n string slice -- n' string slice ) ;
+
+: lex-factor ( n/f string -- n'/f string literal )
+    over [
+        skip-whitespace "\"\\!:[{(\s\r\n" slice-til-either {
+            ! { CHAR: ` [ read-backtick ] }
+            { CHAR: " [ read-string ] }
+            { CHAR: \ [ read-backslash ] }
+            { CHAR: ! [ read-exclamation ] }
+            { CHAR: : [ read-colon ] }
+            { CHAR: [ [ read-bracket ] }
+            { CHAR: { [ read-brace ] }
+            { CHAR: ( [ read-paren ] }
+            { CHAR: < [ read-acute ] }
+            { CHAR: \s [ read-token-or-whitespace ] }
+            { CHAR: \r [ read-token-or-whitespace ] }
+            { CHAR: \n [ read-token-or-whitespace ] }
+            { f [ f like ] }
+        } case
+    ] [
+        f
+    ] if ; inline
+
+: string>literals ( string -- sequence )
+    [ V{ } clone delimiter-stack ] dip '[
+        _ [ 0 ] dip [ lex-factor ] loop>array 2nip
+    ] with-variable ;
+
+: vocab>literals ( vocab -- sequence )
+    ".private" ?tail drop
+    modern-source-path utf8 file-contents string>literals ;
+
+: path>literals ( path -- sequence )
+    utf8 file-contents string>literals ;
+
+: lex-core ( -- assoc )
+    core-bootstrap-vocabs [ [ vocab>literals ] [ nip ] recover ] map-zip ;
--- a/extra/modern/slices/slices.factor
+++ b/extra/modern/slices/slices.factor
@ -0,0 +1,203 @@
+! Copyright (C) 2016 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors assocs fry kernel locals math math.private
+sequences sequences.extras sequences.private unicode ;
+IN: modern.slices
+
+: matching-delimiter ( ch -- ch' )
+    H{
+        { CHAR: ( CHAR: ) }
+        { CHAR: [ CHAR: ] }
+        { CHAR: { CHAR: } }
+        { CHAR: < CHAR: > }
+        { CHAR: : CHAR: ; }
+    } ?at drop ;
+
+: matching-delimiter-string ( string -- string' )
+    [ matching-delimiter ] map ;
+
+ERROR: unexpected-end n string ;
+: nth-check-eof ( n string -- nth )
+    2dup ?nth [ 2nip ] [ unexpected-end ] if* ;
+
+: peek-from ( n/f string -- ch )
+    over [ ?nth ] [ 2drop f ] if ;
+
+! Allow eof
+: next-char-from ( n/f string -- n'/f string ch/f )
+    over [
+        2dup ?nth [ [ 1 + ] 2dip ] [ f ] if*
+    ] [
+        [ 2drop f ] [ nip ] 2bi f
+    ] if ;
+
+: prev-char-from-slice-end ( slice -- ch/f )
+    [ to>> 2 - ] [ seq>> ] bi ?nth ;
+
+: prev-char-from-slice ( slice -- ch/f )
+    [ from>> 1 - ] [ seq>> ] bi ?nth ;
+
+: next-char-from-slice ( slice -- ch/f )
+    [ to>> ] [ seq>> ] bi ?nth ;
+
+: char-before-slice ( slice -- ch/f )
+    [ from>> 1 - ] [ seq>> ] bi ?nth ;
+
+: char-after-slice ( slice -- ch/f )
+    [ to>> ] [ seq>> ] bi ?nth ;
+
+: next-char-from* ( n/f string -- ch/f )
+    next-char-from 2nip ;
+
+: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? )
+    [ find-from ] keep
+    pick [ drop t ] [ length -rot nip f ] if ; inline
+
+: skip-blank-from ( n string -- n' string )
+    [ [ blank? not ] find-from* 2drop ] keep ; inline
+
+: skip-til-eol-from ( n string -- n' string )
+    [ [ "\r\n" member? ] find-from* 2drop ] keep ; inline
+
+! Don't include the whitespace in the slice
+:: slice-til-whitespace ( n string -- n' string slice/f ch/f )
+    n string [ "\s\r\n" member? ] find-from :> ( n' ch )
+    n' string
+    n n' string ?<slice>
+    ch ; inline
+
+:: slice-until' ( n string quot -- n' string slice/f ch/f )
+    n string quot find-from :> ( n' ch )
+    n' string
+    n n' string ?<slice>
+    ch ; inline
+
+: slice-until ( n string quot -- n' string slice/f )
+    slice-until' drop ; inline
+
+:: slice-til-not-whitespace ( n string -- n' string slice/f ch/f )
+    n [
+        n string [ "\s\r\n" member? not ] find-from :> ( n' ch )
+        n' string
+        n n' string ?<slice>
+        ch
+    ] [
+        n string f f
+    ] if ; inline
+
+: skip-whitespace ( n string -- n' string )
+    slice-til-not-whitespace 2drop ;
+
+: empty-slice-end ( seq -- slice )
+    [ length dup ] [ ] bi <slice> ; inline
+
+: empty-slice-from ( n seq -- slice )
+    dupd <slice> ; inline
+
+:: slice-til-eol ( n string -- n' string slice/f ch/f )
+    n [
+        n string '[ "\r\n" member? ] find-from :> ( n' ch )
+        n' string
+        n n' string ?<slice>
+        ch
+    ] [
+        n string string empty-slice-end f
+    ] if ; inline
+
+:: merge-slice-til-eol-slash'' ( n string -- n' string slice/f ch/f )
+    n [
+        n string '[ "\r\n\\" member? ] find-from :> ( n' ch )
+        n' string
+        n n' string ?<slice>
+        ch
+    ] [
+        n string string empty-slice-end f
+    ] if ; inline
+
+: merge-slice-til-whitespace ( n string slice --  n' string slice' )
+    [ slice-til-whitespace drop ] dip merge-slices ;
+
+: merge-slice-til-eol ( n string slice --  n' string slice' )
+    [ slice-til-eol drop ] dip merge-slices ;
+
+: slice-between ( slice1 slice2 -- slice )
+    ! ensure-same-underlying
+    slice-order-by-from
+    [ to>> ]
+    [ [ from>> 2dup < [ swap ] unless ] [ seq>> ] bi ] bi* <slice> ;
+
+: slice-before ( slice -- slice' )
+    [ drop 0 ] [ from>> ] [ seq>> ] tri <slice> ;
+
+: ?nth' ( n/f string/f -- obj/f )
+    over [ ?nth ] [ 2drop f ] if ;
+
+:: merge-slice-til-eol-slash' ( n string slice -- n' string slice/f ch/f )
+    n string merge-slice-til-eol-slash'' :> ( n' string' slice' ch' )
+    ch' CHAR: \ = [
+        n' 1 + string' ?nth' "\r\n" member? [
+            n' 2 + string' slice slice' span-slices merge-slice-til-eol-slash'
+        ] [
+            "omg" throw
+        ] if
+    ] [
+        n' string' slice slice' span-slices ch'
+    ] if ;
+
+! Supports \ at eol (with no space after it)
+: slice-til-eol-slash ( n string -- n' string slice/f ch/f )
+    2dup empty-slice-from merge-slice-til-eol-slash' ;
+
+:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f )
+    n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip  :> ( n' ch )
+    n' string
+    n n' string ?<slice>
+    ch ; inline
+
+: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f )
+    slice-til-separator-inclusive dup [
+        [ [ 1 - ] change-to ] dip
+    ] when ;
+
+:: slice-til-either ( n string tokens -- n'/f string slice/f ch )
+    n [
+        n string '[ tokens member? ] find-from
+        dup "\s\r\n" member? [
+            :> ( n' ch )
+            n' string
+            n n' string ?<slice>
+            ch
+        ] [
+            [ dup [ 1 + ] when ] dip :> ( n' ch )
+            n' string
+            n n' string ?<slice>
+            ch
+        ] if
+    ] [
+        f string f f
+    ] if ; inline
+
+ERROR: subseq-expected-but-got-eof n string expected ;
+
+:: slice-til-string ( n string search --  n' string payload end-string )
+    search string n subseq-start-from :> n'
+    n' [ n string search subseq-expected-but-got-eof ] unless
+    n' search length +  string
+    n n' string ?<slice>
+    n' dup search length + string ?<slice> ;
+
+: modify-from ( slice n -- slice' )
+    '[ from>> _ + ] [ to>> ] [ seq>> ] tri <slice> ;
+
+: modify-to ( slice n -- slice' )
+    [ [ from>> ] [ to>> ] [ seq>> ] tri ] dip
+    swap [ + ] dip <slice> ;
+
+! { CHAR: ] [ read-closing ] }
+! { CHAR: } [ read-closing ] }
+! { CHAR: ) [ read-closing ] }
+: read-closing ( n string tok -- n string tok )
+    dup length 1 = [
+        -1 modify-to [ 1 - ] 2dip
+    ] unless ;
+