modern: Add a stripped-down parser from what I had.
parent
081b642dee
commit
218530209f
|
@ -0,0 +1,234 @@
|
||||||
|
! Copyright (C) 2016 Doug Coleman.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors arrays assocs combinators
|
||||||
|
combinators.short-circuit continuations fry io.encodings.utf8
|
||||||
|
io.files kernel locals make math math.order modern.paths
|
||||||
|
modern.slices namespaces sequences sequences.extras shuffle
|
||||||
|
splitting strings unicode ;
|
||||||
|
IN: modern
|
||||||
|
|
||||||
|
ERROR: string-expected-got-eof n string ;
|
||||||
|
ERROR: long-opening-mismatch tag open n string ch ;
|
||||||
|
|
||||||
|
SYMBOL: delimiter-stack
|
||||||
|
: push-delimiter-stack ( obj -- ) delimiter-stack get push ;
|
||||||
|
: pop-delimiter-stack ( -- obj ) delimiter-stack get pop ;
|
||||||
|
|
||||||
|
! (( )) [[ ]] {{ }}
|
||||||
|
MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) )
|
||||||
|
open-ch dup matching-delimiter {
|
||||||
|
[ drop 2 swap <string> ]
|
||||||
|
[ drop 1string ]
|
||||||
|
[ nip 2 swap <string> ]
|
||||||
|
} 2cleave :> ( openstr2 openstr1 closestr2 )
|
||||||
|
[| n string tag! ch |
|
||||||
|
ch {
|
||||||
|
{ CHAR: = [
|
||||||
|
n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch )
|
||||||
|
ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless
|
||||||
|
opening matching-delimiter-string :> needle
|
||||||
|
|
||||||
|
n' string' needle slice-til-string :> ( n'' string'' payload closing )
|
||||||
|
n'' string
|
||||||
|
payload closing tag opening 4array
|
||||||
|
] }
|
||||||
|
{ open-ch [
|
||||||
|
tag 1 cut-slice* swap tag! 1 modify-to :> opening
|
||||||
|
n 1 + string closestr2 slice-til-string :> ( n' string' payload closing )
|
||||||
|
n' string
|
||||||
|
payload closing tag opening 4array
|
||||||
|
] }
|
||||||
|
[ [ tag openstr2 n string ] dip long-opening-mismatch ]
|
||||||
|
} case
|
||||||
|
] ;
|
||||||
|
|
||||||
|
: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ;
|
||||||
|
: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ;
|
||||||
|
: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ;
|
||||||
|
|
||||||
|
DEFER: lex-factor
|
||||||
|
ERROR: lex-expected-but-got-eof n string expected ;
|
||||||
|
! For implementing [ { (
|
||||||
|
: lex-until ( n string tags -- n' string payload closing )
|
||||||
|
pick [
|
||||||
|
3dup '[
|
||||||
|
[
|
||||||
|
lex-factor dup , [
|
||||||
|
dup [
|
||||||
|
! } gets a chance, but then also full seq { } after recursion...
|
||||||
|
[ _ ] dip '[ _ sequence= ] any? not
|
||||||
|
] [
|
||||||
|
drop t ! loop again?
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
_ _ _ lex-expected-but-got-eof
|
||||||
|
] if*
|
||||||
|
] loop
|
||||||
|
] { } make unclip-last
|
||||||
|
] [
|
||||||
|
lex-expected-but-got-eof
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: lex-colon-until ( n string tags -- n' string payload closing )
|
||||||
|
pick [
|
||||||
|
3dup '[
|
||||||
|
[
|
||||||
|
lex-factor dup [ , ] when* [
|
||||||
|
dup [
|
||||||
|
! } gets a chance, but then also full seq { } after recursion...
|
||||||
|
[ _ ] dip '[ _ sequence= ] any? not
|
||||||
|
] [
|
||||||
|
drop t ! loop again?
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
_ _ _ lex-expected-but-got-eof
|
||||||
|
] if*
|
||||||
|
] loop
|
||||||
|
] { } make unclip-last
|
||||||
|
] [
|
||||||
|
lex-expected-but-got-eof
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: split-double-dash ( seq -- seqs )
|
||||||
|
dup [ { [ "--" sequence= ] } 1&& ] split-when
|
||||||
|
dup length 1 > [ nip ] [ drop ] if ;
|
||||||
|
|
||||||
|
MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) )
|
||||||
|
ch dup matching-delimiter {
|
||||||
|
[ drop "=" swap prefix ]
|
||||||
|
[ nip 1string ]
|
||||||
|
} 2cleave :> ( openstreq closestr1 ) ! [= ]
|
||||||
|
[| n string tag |
|
||||||
|
n string tag
|
||||||
|
2over nth-check-eof {
|
||||||
|
{ [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or ((
|
||||||
|
{ [ dup blank? ] [
|
||||||
|
drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip
|
||||||
|
1 cut-slice* 2swap 4array ] } ! ( foo )
|
||||||
|
[ drop [ slice-til-whitespace drop ] dip span-slices ] ! (foo)
|
||||||
|
} cond
|
||||||
|
] ;
|
||||||
|
|
||||||
|
: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ;
|
||||||
|
: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ;
|
||||||
|
: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ;
|
||||||
|
: read-string-payload ( n string -- n' string )
|
||||||
|
over [
|
||||||
|
{ CHAR: \ CHAR: " } slice-til-separator-inclusive {
|
||||||
|
{ f [ drop ] }
|
||||||
|
{ CHAR: " [ drop ] }
|
||||||
|
{ CHAR: \ [ drop next-char-from drop read-string-payload ] }
|
||||||
|
} case
|
||||||
|
] [
|
||||||
|
string-expected-got-eof
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
:: read-string ( n string tag -- n' string seq )
|
||||||
|
n string read-string-payload drop :> n'
|
||||||
|
n' string
|
||||||
|
n' [ n string string-expected-got-eof ] unless
|
||||||
|
n n' 1 - string <slice>
|
||||||
|
n' 1 - n' string <slice>
|
||||||
|
tag 1 cut-slice* 4array ;
|
||||||
|
|
||||||
|
: take-comment ( n string slice -- n' string comment )
|
||||||
|
2over ?nth CHAR: [ = [
|
||||||
|
[ 1 + ] 2dip 2over ?nth read-double-matched-bracket
|
||||||
|
] [
|
||||||
|
[ slice-til-eol drop dup ] dip 1 cut-slice* 4array
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: read-til-semicolon ( n string slice -- n' string semi )
|
||||||
|
dup '[ but-last ";" append ";" 2array lex-colon-until ] dip
|
||||||
|
-rot 3array ;
|
||||||
|
|
||||||
|
: read-word-or-til-semicolon ( n string slice -- n' string obj )
|
||||||
|
2over next-char-from* "\s\r\n" member? [
|
||||||
|
read-til-semicolon
|
||||||
|
] [
|
||||||
|
merge-slice-til-whitespace
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: read-lowercase-colon ( n string slice -- n' string lowercase-colon )
|
||||||
|
[ lex-factor ] dip swap 2array ;
|
||||||
|
|
||||||
|
: strict-upper? ( string -- ? )
|
||||||
|
[ { [ CHAR: A CHAR: Z between? ] [ "#:-" member? ] } 1|| ] all? ;
|
||||||
|
|
||||||
|
ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ;
|
||||||
|
: read-colon ( n string slice -- n' string colon )
|
||||||
|
dup length 1 = [
|
||||||
|
dup prev-char-from-slice { CHAR: \s CHAR: \r CHAR: \n f } member? [
|
||||||
|
read-til-semicolon
|
||||||
|
] [
|
||||||
|
read-lowercase-colon
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
{ [ dup strict-upper? ] [ B read-til-semicolon ] }
|
||||||
|
[ read-lowercase-colon ]
|
||||||
|
} cond
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: read-acute ( n string slice -- n' string acute )
|
||||||
|
;
|
||||||
|
|
||||||
|
! Words like append! and suffix! are allowed for now.
|
||||||
|
: read-exclamation ( n string slice -- n' string obj )
|
||||||
|
dup { [ "!" sequence= ] [ "#!" sequence= ] } 1||
|
||||||
|
[ take-comment ] [ merge-slice-til-whitespace ] if ;
|
||||||
|
|
||||||
|
ERROR: backslash-expects-whitespace slice ;
|
||||||
|
: read-backslash ( n string slice -- n' string obj )
|
||||||
|
2over peek-from blank? [
|
||||||
|
! \ foo, M\ foo
|
||||||
|
[ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* 4array
|
||||||
|
] [
|
||||||
|
! M\N
|
||||||
|
merge-slice-til-whitespace
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
! If the slice is 0 width, we stopped on whitespace.
|
||||||
|
! Advance the index and read again!
|
||||||
|
: read-token-or-whitespace ( n string slice -- n' string slice )
|
||||||
|
dup length 0 =
|
||||||
|
[ drop [ 1 + ] dip lex-factor ] when ;
|
||||||
|
|
||||||
|
ERROR: mismatched-terminator n string slice ;
|
||||||
|
: read-terminator ( n string slice -- n' string slice ) ;
|
||||||
|
|
||||||
|
: lex-factor ( n/f string -- n'/f string literal )
|
||||||
|
over [
|
||||||
|
skip-whitespace "\"\\!:[{(\s\r\n" slice-til-either {
|
||||||
|
! { CHAR: ` [ read-backtick ] }
|
||||||
|
{ CHAR: " [ read-string ] }
|
||||||
|
{ CHAR: \ [ read-backslash ] }
|
||||||
|
{ CHAR: ! [ read-exclamation ] }
|
||||||
|
{ CHAR: : [ read-colon ] }
|
||||||
|
{ CHAR: [ [ read-bracket ] }
|
||||||
|
{ CHAR: { [ read-brace ] }
|
||||||
|
{ CHAR: ( [ read-paren ] }
|
||||||
|
{ CHAR: < [ read-acute ] }
|
||||||
|
{ CHAR: \s [ read-token-or-whitespace ] }
|
||||||
|
{ CHAR: \r [ read-token-or-whitespace ] }
|
||||||
|
{ CHAR: \n [ read-token-or-whitespace ] }
|
||||||
|
{ f [ f like ] }
|
||||||
|
} case
|
||||||
|
] [
|
||||||
|
f
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
: string>literals ( string -- sequence )
|
||||||
|
[ V{ } clone delimiter-stack ] dip '[
|
||||||
|
_ [ 0 ] dip [ lex-factor ] loop>array 2nip
|
||||||
|
] with-variable ;
|
||||||
|
|
||||||
|
: vocab>literals ( vocab -- sequence )
|
||||||
|
".private" ?tail drop
|
||||||
|
modern-source-path utf8 file-contents string>literals ;
|
||||||
|
|
||||||
|
: path>literals ( path -- sequence )
|
||||||
|
utf8 file-contents string>literals ;
|
||||||
|
|
||||||
|
: lex-core ( -- assoc )
|
||||||
|
core-bootstrap-vocabs [ [ vocab>literals ] [ nip ] recover ] map-zip ;
|
|
@ -0,0 +1,203 @@
|
||||||
|
! Copyright (C) 2016 Doug Coleman.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: accessors assocs fry kernel locals math math.private
|
||||||
|
sequences sequences.extras sequences.private unicode ;
|
||||||
|
IN: modern.slices
|
||||||
|
|
||||||
|
: matching-delimiter ( ch -- ch' )
|
||||||
|
H{
|
||||||
|
{ CHAR: ( CHAR: ) }
|
||||||
|
{ CHAR: [ CHAR: ] }
|
||||||
|
{ CHAR: { CHAR: } }
|
||||||
|
{ CHAR: < CHAR: > }
|
||||||
|
{ CHAR: : CHAR: ; }
|
||||||
|
} ?at drop ;
|
||||||
|
|
||||||
|
: matching-delimiter-string ( string -- string' )
|
||||||
|
[ matching-delimiter ] map ;
|
||||||
|
|
||||||
|
ERROR: unexpected-end n string ;
|
||||||
|
: nth-check-eof ( n string -- nth )
|
||||||
|
2dup ?nth [ 2nip ] [ unexpected-end ] if* ;
|
||||||
|
|
||||||
|
: peek-from ( n/f string -- ch )
|
||||||
|
over [ ?nth ] [ 2drop f ] if ;
|
||||||
|
|
||||||
|
! Allow eof
|
||||||
|
: next-char-from ( n/f string -- n'/f string ch/f )
|
||||||
|
over [
|
||||||
|
2dup ?nth [ [ 1 + ] 2dip ] [ f ] if*
|
||||||
|
] [
|
||||||
|
[ 2drop f ] [ nip ] 2bi f
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: prev-char-from-slice-end ( slice -- ch/f )
|
||||||
|
[ to>> 2 - ] [ seq>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: prev-char-from-slice ( slice -- ch/f )
|
||||||
|
[ from>> 1 - ] [ seq>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: next-char-from-slice ( slice -- ch/f )
|
||||||
|
[ to>> ] [ seq>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: char-before-slice ( slice -- ch/f )
|
||||||
|
[ from>> 1 - ] [ seq>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: char-after-slice ( slice -- ch/f )
|
||||||
|
[ to>> ] [ seq>> ] bi ?nth ;
|
||||||
|
|
||||||
|
: next-char-from* ( n/f string -- ch/f )
|
||||||
|
next-char-from 2nip ;
|
||||||
|
|
||||||
|
: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? )
|
||||||
|
[ find-from ] keep
|
||||||
|
pick [ drop t ] [ length -rot nip f ] if ; inline
|
||||||
|
|
||||||
|
: skip-blank-from ( n string -- n' string )
|
||||||
|
[ [ blank? not ] find-from* 2drop ] keep ; inline
|
||||||
|
|
||||||
|
: skip-til-eol-from ( n string -- n' string )
|
||||||
|
[ [ "\r\n" member? ] find-from* 2drop ] keep ; inline
|
||||||
|
|
||||||
|
! Don't include the whitespace in the slice
|
||||||
|
:: slice-til-whitespace ( n string -- n' string slice/f ch/f )
|
||||||
|
n string [ "\s\r\n" member? ] find-from :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch ; inline
|
||||||
|
|
||||||
|
:: slice-until' ( n string quot -- n' string slice/f ch/f )
|
||||||
|
n string quot find-from :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch ; inline
|
||||||
|
|
||||||
|
: slice-until ( n string quot -- n' string slice/f )
|
||||||
|
slice-until' drop ; inline
|
||||||
|
|
||||||
|
:: slice-til-not-whitespace ( n string -- n' string slice/f ch/f )
|
||||||
|
n [
|
||||||
|
n string [ "\s\r\n" member? not ] find-from :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch
|
||||||
|
] [
|
||||||
|
n string f f
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
: skip-whitespace ( n string -- n' string )
|
||||||
|
slice-til-not-whitespace 2drop ;
|
||||||
|
|
||||||
|
: empty-slice-end ( seq -- slice )
|
||||||
|
[ length dup ] [ ] bi <slice> ; inline
|
||||||
|
|
||||||
|
: empty-slice-from ( n seq -- slice )
|
||||||
|
dupd <slice> ; inline
|
||||||
|
|
||||||
|
:: slice-til-eol ( n string -- n' string slice/f ch/f )
|
||||||
|
n [
|
||||||
|
n string '[ "\r\n" member? ] find-from :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch
|
||||||
|
] [
|
||||||
|
n string string empty-slice-end f
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
:: merge-slice-til-eol-slash'' ( n string -- n' string slice/f ch/f )
|
||||||
|
n [
|
||||||
|
n string '[ "\r\n\\" member? ] find-from :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch
|
||||||
|
] [
|
||||||
|
n string string empty-slice-end f
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
: merge-slice-til-whitespace ( n string slice -- n' string slice' )
|
||||||
|
[ slice-til-whitespace drop ] dip merge-slices ;
|
||||||
|
|
||||||
|
: merge-slice-til-eol ( n string slice -- n' string slice' )
|
||||||
|
[ slice-til-eol drop ] dip merge-slices ;
|
||||||
|
|
||||||
|
: slice-between ( slice1 slice2 -- slice )
|
||||||
|
! ensure-same-underlying
|
||||||
|
slice-order-by-from
|
||||||
|
[ to>> ]
|
||||||
|
[ [ from>> 2dup < [ swap ] unless ] [ seq>> ] bi ] bi* <slice> ;
|
||||||
|
|
||||||
|
: slice-before ( slice -- slice' )
|
||||||
|
[ drop 0 ] [ from>> ] [ seq>> ] tri <slice> ;
|
||||||
|
|
||||||
|
: ?nth' ( n/f string/f -- obj/f )
|
||||||
|
over [ ?nth ] [ 2drop f ] if ;
|
||||||
|
|
||||||
|
:: merge-slice-til-eol-slash' ( n string slice -- n' string slice/f ch/f )
|
||||||
|
n string merge-slice-til-eol-slash'' :> ( n' string' slice' ch' )
|
||||||
|
ch' CHAR: \ = [
|
||||||
|
n' 1 + string' ?nth' "\r\n" member? [
|
||||||
|
n' 2 + string' slice slice' span-slices merge-slice-til-eol-slash'
|
||||||
|
] [
|
||||||
|
"omg" throw
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
n' string' slice slice' span-slices ch'
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
! Supports \ at eol (with no space after it)
|
||||||
|
: slice-til-eol-slash ( n string -- n' string slice/f ch/f )
|
||||||
|
2dup empty-slice-from merge-slice-til-eol-slash' ;
|
||||||
|
|
||||||
|
:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f )
|
||||||
|
n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch ; inline
|
||||||
|
|
||||||
|
: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f )
|
||||||
|
slice-til-separator-inclusive dup [
|
||||||
|
[ [ 1 - ] change-to ] dip
|
||||||
|
] when ;
|
||||||
|
|
||||||
|
:: slice-til-either ( n string tokens -- n'/f string slice/f ch )
|
||||||
|
n [
|
||||||
|
n string '[ tokens member? ] find-from
|
||||||
|
dup "\s\r\n" member? [
|
||||||
|
:> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch
|
||||||
|
] [
|
||||||
|
[ dup [ 1 + ] when ] dip :> ( n' ch )
|
||||||
|
n' string
|
||||||
|
n n' string ?<slice>
|
||||||
|
ch
|
||||||
|
] if
|
||||||
|
] [
|
||||||
|
f string f f
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
|
ERROR: subseq-expected-but-got-eof n string expected ;
|
||||||
|
|
||||||
|
:: slice-til-string ( n string search -- n' string payload end-string )
|
||||||
|
search string n subseq-start-from :> n'
|
||||||
|
n' [ n string search subseq-expected-but-got-eof ] unless
|
||||||
|
n' search length + string
|
||||||
|
n n' string ?<slice>
|
||||||
|
n' dup search length + string ?<slice> ;
|
||||||
|
|
||||||
|
: modify-from ( slice n -- slice' )
|
||||||
|
'[ from>> _ + ] [ to>> ] [ seq>> ] tri <slice> ;
|
||||||
|
|
||||||
|
: modify-to ( slice n -- slice' )
|
||||||
|
[ [ from>> ] [ to>> ] [ seq>> ] tri ] dip
|
||||||
|
swap [ + ] dip <slice> ;
|
||||||
|
|
||||||
|
! { CHAR: ] [ read-closing ] }
|
||||||
|
! { CHAR: } [ read-closing ] }
|
||||||
|
! { CHAR: ) [ read-closing ] }
|
||||||
|
: read-closing ( n string tok -- n string tok )
|
||||||
|
dup length 1 = [
|
||||||
|
-1 modify-to [ 1 - ] 2dip
|
||||||
|
] unless ;
|
||||||
|
|
Loading…
Reference in New Issue