modern: A bit of duplication but it all works?

The top vs nested parsing can be cleaned up with a flag but the stack shuffling has to be done....
modern-harvey2
Doug Coleman 2017-12-03 19:21:37 -06:00
parent 3a95591005
commit 650bff4793
1 changed files with 53 additions and 9 deletions

View File

@ -66,10 +66,11 @@ ERROR: lex-expected-but-got-eof n string expected ;
DEFER: section-close?
DEFER: upper-colon?
DEFER: lex-factor-nested
: lex-colon-until ( n string tag-sequence -- n' string payload )
'[
[
lex-factor-top dup f like [ , ] when* [
lex-factor-nested dup f like [ , ] when* [
dup [
dup { [ section-close? ] [ upper-colon? ] } 1|| [
drop f
@ -182,7 +183,7 @@ ERROR: unexpected-terminator n string slice ;
{
[ length 2 >= ]
[ ":" tail? ]
[ dup [ char: : = ] find drop head strict-upper? ]
[ dup [ char: \: = ] find drop head strict-upper? ]
} 1&& ;
: section-close? ( string -- ? )
@ -217,7 +218,7 @@ ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ;
{
{ [ dup strict-upper? ] [ read-til-semicolon ] }
{ [ dup ":" tail? ] [ dup ":" head? [ read-lowercase-colon ] unless ] } ! :foo: vs foo:
[ "here for some reason" throw ]
[ ]
} cond ;
: read-acute ( n string slice -- n' string acute )
@ -256,12 +257,30 @@ DEFER: lex-factor-top*
! If the slice is 0 width, we stopped on whitespace.
! Advance the index and read again!
: read-token-or-whitespace ( n string slice -- n' string slice/f )
: read-token-or-whitespace-top ( n string slice -- n' string slice/f )
dup length 0 = [ [ 1 + ] 2dip drop lex-factor-top ] when ;
: read-token-or-whitespace-nested ( n string slice -- n' string slice/f )
dup length 0 = [ [ 1 + ] 2dip drop lex-factor-nested ] when ;
! Inside a FOO: or a <FOO FOO>
: lex-factor-nested ( n/f string slice/f ch/f -- n'/f string literal )
: lex-factor-nested* ( n/f string slice/f ch/f -- n'/f string literal )
{
! Nested ``A: a B: b`` so rewind and let the parser get it top-level
{ char: \: [ merge-slice-til-whitespace rewind-slice f ] }
{ char: < [
! FOO: a b <BAR: ;BAR>
! FOO: a b <BAR BAR>
! FOO: a b <asdf>
! FOO: a b <asdf asdf>
! if we are in a FOO: and we hit a <BAR or <BAR:
! then end the FOO:
[ slice-til-whitespace drop ] dip span-slices
dup section-open? [ rewind-slice f ] when
] }
{ char: \\ [ read-backslash ] }
{ char: \[ [ read-bracket ] }
{ char: \{ [ read-brace ] }
@ -269,9 +288,9 @@ DEFER: lex-factor-top*
{ char: \] [ ] }
{ char: \} [ ] }
{ char: \) [ ] }
{ char: \s [ read-token-or-whitespace ] }
{ char: \r [ read-token-or-whitespace ] }
{ char: \n [ read-token-or-whitespace ] }
{ char: \s [ read-token-or-whitespace-nested ] }
{ char: \r [ read-token-or-whitespace-nested ] }
{ char: \n [ read-token-or-whitespace-nested ] }
{ char: \" [ read-string ] }
{ char: \! [ read-exclamation ] }
{ char: > [
@ -283,6 +302,12 @@ DEFER: lex-factor-top*
{ f [ ] }
} case ;
: lex-factor-nested ( n/f string -- n'/f string literal )
! skip-whitespace
"\"\\!:[{(]})<>\s\r\n" slice-til-either
lex-factor-nested* ; inline
: lex-factor-top* ( n/f string slice/f ch/f -- n'/f string literal )
{
{ char: \: [ merge-slice-til-whitespace read-colon ] }
@ -297,7 +322,26 @@ DEFER: lex-factor-top*
[ slice-til-whitespace drop ] dip span-slices
dup section-open? [ read-acute ] when
] }
[ lex-factor-nested ]
{ char: \\ [ read-backslash ] }
{ char: \[ [ read-bracket ] }
{ char: \{ [ read-brace ] }
{ char: \( [ read-paren ] }
{ char: \] [ ] }
{ char: \} [ ] }
{ char: \) [ ] }
{ char: \s [ read-token-or-whitespace-top ] }
{ char: \r [ read-token-or-whitespace-top ] }
{ char: \n [ read-token-or-whitespace-top ] }
{ char: \" [ read-string ] }
{ char: \! [ read-exclamation ] }
{ char: > [
[ [ char: > = not ] slice-until ] dip merge-slices
dup section-close? [
[ slice-til-whitespace drop ] dip ?span-slices
] unless
] }
{ f [ ] }
} case ;
: lex-factor-top ( n/f string -- n'/f string literal )