modern: use lexer instead of `n string`.

locals-and-roots
Doug Coleman 2016-06-19 11:47:06 -07:00
parent 721434cb44
commit dda8add101
2 changed files with 97 additions and 73 deletions

View File

@ -1,7 +1,7 @@
! Copyright (C) 2016 Doug Coleman. ! Copyright (C) 2016 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors constructors kernel math sequences USING: accessors constructors kernel math sequences
sequences.extras slots.syntax ; sequences.extras slots.syntax unicode ;
in: modern.lexer in: modern.lexer
TUPLE: modern-lexer n string stack ; TUPLE: modern-lexer n string stack ;
@ -11,6 +11,16 @@ CONSTRUCTOR: <modern-lexer> modern-lexer ( string -- obj )
: >lexer< ( lexer -- n string ) slots[ n string ] ; : >lexer< ( lexer -- n string ) slots[ n string ] ;
: ?lexer-nth ( lexer -- obj )
>lexer< over [ ?nth ] [ 2drop f ] if ;
ERROR: unexpected-end n string ;
: nth-check-eof ( n string -- nth )
2dup ?nth [ 2nip ] [ unexpected-end ] if* ; inline
: lexer-nth-check-eof ( lexer -- nth )
>lexer< nth-check-eof ;
:: slice-til-either ( n string tokens -- n'/f string slice/f ch/f ) :: slice-til-either ( n string tokens -- n'/f string slice/f ch/f )
n [ n [
n string '[ tokens member? ] find-from n string '[ tokens member? ] find-from
@ -115,3 +125,14 @@ ERROR: subseq-expected-but-got-eof n string expected ;
lexer lexer
n' >>n drop n' >>n drop
n' string' payload closing ; n' string' payload closing ;
: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? )
[ find-from ] keep
pick [ drop t ] [ length -rot nip f ] if ; inline
: skip-blank-from ( n string -- n' string )
[ [ blank? not ] find-from* 2drop ] keep ; inline
: skip-blanks ( lexer -- lexer )
dup >lexer< skip-blank-from drop >>n ; inline

View File

@ -4,7 +4,7 @@ USING: accessors arrays assocs assocs.extras combinators
combinators.short-circuit constructors continuations fry combinators.short-circuit constructors continuations fry
io.encodings.utf8 io.files kernel locals macros make math io.encodings.utf8 io.files kernel locals macros make math
math.order modern.paths modern.slices multiline namespaces math.order modern.paths modern.slices multiline namespaces
quotations sequences sequences.extras splitting quotations sequences sequences.extras splitting modern.lexer
splitting.monotonic strings unicode generalizations ; splitting.monotonic strings unicode generalizations ;
in: modern in: modern
@ -203,39 +203,37 @@ ERROR: mismatched-closing opening closing ;
delimiter 1array >>seq delimiter 1array >>seq
delimiter >>underlying ; inline delimiter >>underlying ; inline
ERROR: long-opening-mismatch tag open n string ch ; ERROR: long-opening-mismatch tag open lexer ch ;
! (( )) [[ ]] {{ }} ! (( )) [[ ]] {{ }}
MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) ) MACRO:: read-double-matched ( open-ch -- quot: ( lexer tag ch -- seq ) )
open-ch dup matching-delimiter { open-ch dup matching-delimiter {
[ drop 2 swap <string> ] [ drop 2 swap <string> ]
[ drop 1string ] [ drop 1string ]
[ nip 2 swap <string> ] [ nip 2 swap <string> ]
} 2cleave :> ( openstr2 openstr1 closestr2 ) } 2cleave :> ( openstr2 openstr1 closestr2 )
|[ n string tag! ch | |[ lexer tag! ch |
ch { ch {
{ char: = [ { char: = [
n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) lexer openstr1 lex-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch )
ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless ch open-ch = [ tag openstr2 lexer ch long-opening-mismatch ] unless
opening matching-delimiter-string :> needle opening matching-delimiter-string :> needle
n' string' needle slice-til-string :> ( n'' string'' payload closing ) lexer needle lex-til-string :> ( n'' string'' payload closing )
n'' string
payload closing tag opening double-matched-literal make-matched-literal payload closing tag opening double-matched-literal make-matched-literal
] } ] }
{ open-ch [ { open-ch [
tag 1 cut-slice* swap tag! 1 modify-to :> opening tag 1 cut-slice* swap tag! 1 modify-to :> opening
n 1 + string closestr2 slice-til-string :> ( n' string' payload closing ) lexer [ 1 + ] change-n closestr2 lex-til-string :> ( n' string' payload closing )
n' string
payload closing tag opening double-matched-literal make-matched-literal payload closing tag opening double-matched-literal make-matched-literal
] } ] }
[ [ tag openstr2 n string ] dip long-opening-mismatch ] [ [ tag openstr2 lexer ] dip long-opening-mismatch ]
} case } case
] ; ] ;
: read-double-matched-paren ( n string tag ch -- n' string seq ) char: \( read-double-matched ; : read-double-matched-paren ( lexer tag ch -- seq ) char: \( read-double-matched ;
: read-double-matched-bracket ( n string tag ch -- n' string seq ) char: \[ read-double-matched ; : read-double-matched-bracket ( lexer tag ch -- seq ) char: \[ read-double-matched ;
: read-double-matched-brace ( n string tag ch -- n' string seq ) char: \{ read-double-matched ; : read-double-matched-brace ( lexer tag ch -- seq ) char: \{ read-double-matched ;
defer: lex defer: lex
defer: lex-factor defer: lex-factor
@ -249,11 +247,11 @@ ERROR: lex-expected-but-got-eof n string quot ;
ERROR: unnestable-form n string obj ; ERROR: unnestable-form n string obj ;
! For implementing [ { ( ! For implementing [ { (
: lex-until ( n string tags -- n' string payload closing ) : lex-until ( lexer tags -- payload closing )
! 3 npick [ lex-expected-but-got-eof ] unless ! 3 npick [ lex-expected-but-got-eof ] unless
'[ '[
[ [
lex-factor [ _ lex-factor [
! [ _ _ _ lex-expected-but-got-eof ] unless* ! [ _ _ _ lex-expected-but-got-eof ] unless*
dup tag-literal? [ dup tag-literal? [
dup , dup ,
@ -266,38 +264,41 @@ ERROR: unnestable-form n string obj ;
] loop ] loop
] { } make unclip-last ; inline ] { } make unclip-last ; inline
MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) MACRO:: read-matched ( ch -- quot: ( lexer tag -- slice' ) )
ch dup matching-delimiter { ch dup matching-delimiter {
[ drop "=" swap prefix ] [ drop "=" swap prefix ]
[ nip 1string ] [ nip 1string ]
} 2cleave :> ( openstreq closestr1 ) ! [= ] } 2cleave :> ( openstreq closestr1 ) ! [= ]
|[ n string tag |
n string tag |[ lexer tag |
2over nth-check-eof { lexer tag
{ [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( over lexer-nth-check-eof {
{ [ dup blank? ] [ drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip 1 cut-slice* single-matched-literal make-matched-literal ] } ! ( foo ) { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or ((
[ drop [ slice-til-whitespace drop ] dip span-slices make-tag-literal ] ! (foo) { [ dup blank? ] [ drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip 1 cut-slice* single-matched-literal make-matched-literal ] } ! ( foo )
[ drop [ lex-til-whitespace drop 2nip ] dip span-slices make-tag-literal ] ! (foo)
} cond } cond
] ; ] ;
: read-bracket ( n string slice -- n' string slice' ) char: \[ read-matched ; : read-bracket ( lexer slice -- slice' ) char: \[ read-matched ;
: read-brace ( n string slice -- n' string slice' ) char: \{ read-matched ; : read-brace ( lexer slice -- slice' ) char: \{ read-matched ;
: read-paren ( n string slice -- n' string slice' ) char: \( read-matched ; : read-paren ( lexer slice -- slice' ) char: \( read-matched ;
: read-string-payload ( n string -- n' string ) :: read-string-payload ( lexer -- n' string slice )
over [ lexer dup ?lexer-nth [
{ char: \\ char: \" } slice-til-separator-inclusive { { char: \\ char: \" } lex-til-separator-inclusive :> ( n' string' slice ch )
{ f [ drop ] } ch {
{ char: \" [ drop ] } { f [ n' string' slice ] }
{ char: \\ [ drop next-char-from drop read-string-payload ] } { char: \" [ n' string' slice ] }
{ char: \\ [ lexer [ 1 + ] change-n read-string-payload ] }
} case } case
] [ ] [
string-expected-got-eof lexer >lexer< f string-expected-got-eof
] if ; ] if ;
:: read-string ( n string tag -- n' string seq ) :: read-string ( lexer tag -- seq )
n string read-string-payload drop :> n' lexer n>> :> n
n' string lexer read-string-payload :> ( n' string slice )
! n' string
n' [ n string string-expected-got-eof ] unless n' [ n string string-expected-got-eof ] unless
n n' 1 - string <slice> n n' 1 - string <slice>
n' 1 - n' string <slice> n' 1 - n' string <slice>
@ -306,78 +307,79 @@ MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) )
ERROR: cannot-nest-upper-colon n string string' ; ERROR: cannot-nest-upper-colon n string string' ;
: read-upper-colon ( n string string' -- n' string obj ) : read-upper-colon ( lexer string' -- obj )
! 4 npick 0 > [ cannot-nest-upper-colon ] when ! 4 npick 0 > [ cannot-nest-upper-colon ] when
dup [ scoped-colon-name [ but-last ";" append ";" 2array ] [ ";" 1array ] if* lex-until ] dip dup [ scoped-colon-name [ but-last ";" append ";" 2array ] [ ";" 1array ] if* lex-until ] dip
1 cut-slice* uppercase-colon-literal make-matched-literal ; 1 cut-slice* uppercase-colon-literal make-matched-literal ;
: read-lower-colon ( n string string' -- n' string obj ) : read-lower-colon ( lexer string' -- obj )
[ lex-factor dup ] dip 1 cut-slice* [ lex-factor dup ] dip 1 cut-slice*
lowercase-colon-literal make-delimited-literal ; lowercase-colon-literal make-delimited-literal ;
! : foo: :foo foo:bar foo:BAR: foo:bar: :foo: ! : foo: :foo foo:bar foo:BAR: foo:bar: :foo:
: read-colon ( n string slice -- n' string colon ) : read-colon ( lexer slice -- colon )
merge-slice-til-whitespace { dupd merge-lex-til-whitespace {
{ [ dup length 1 = ] [ read-upper-colon ] } { [ dup length 1 = ] [ read-upper-colon ] }
{ [ dup [ char: \: = ] all? ] [ read-upper-colon ] } { [ dup [ char: \: = ] all? ] [ read-upper-colon ] }
{ [ dup { [ ":" head? ] [ ":" tail? ] } 1&& ] [ make-tag-literal ] } { [ dup { [ ":" head? ] [ ":" tail? ] } 1&& ] [ nip make-tag-literal ] }
{ [ dup ":" tail? ] [ dup scoped-upper? [ read-upper-colon ] [ read-lower-colon ] if ] } { [ dup ":" tail? ] [ dup scoped-upper? [ read-upper-colon ] [ read-lower-colon ] if ] }
{ [ dup ":" head? ] [ make-tag-literal ] } ! :foo( ... ) { [ dup ":" head? ] [ nip make-tag-literal ] } ! :foo( ... )
[ make-tag-literal ] [ nip make-tag-literal ]
} cond ; } cond ;
: read-upper-less-than ( n string slice -- n' string less-than ) : read-upper-less-than ( lexer slice -- less-than )
dup [ scoped-less-than-name [ but-last ">" append 1array ] [ ">" 1array ] if* lex-until ] dip dup [ scoped-less-than-name [ but-last ">" append 1array ] [ ">" 1array ] if* lex-until ] dip
1 cut-slice* less-than-literal make-matched-literal ; 1 cut-slice* less-than-literal make-matched-literal ;
: read-less-than ( n string slice -- n' string less-than ) : read-less-than ( lexer slice -- less-than )
merge-slice-til-whitespace { dupd merge-lex-til-whitespace {
{ [ dup length 1 = ] [ make-tag-literal ] } ! "<" { [ dup length 1 = ] [ nip make-tag-literal ] } ! "<"
{ [ dup "<" tail? ] [ dup scoped-upper? [ read-upper-less-than ] [ make-tag-literal ] if ] } ! FOO< or foo< { [ dup "<" tail? ] [ dup scoped-upper? [ read-upper-less-than ] [ nip make-tag-literal ] if ] } ! FOO< or foo<
[ make-tag-literal ] [ nip make-tag-literal ]
} cond ; } cond ;
: take-comment ( n string slice -- n' string comment ) : take-comment ( lexer slice -- comment )
2over ?nth char: \[ = [ over ?lexer-nth char: \[ = [
[ 1 + ] 2dip 2over ?nth read-double-matched-bracket [ [ 1 + ] change-n ] dip over ?lexer-nth read-double-matched-bracket
] [ ] [
[ slice-til-eol drop dup ] dip 1 cut-slice* line-comment-literal make-delimited-literal [ lex-til-eol drop 2nip dup ] dip 1 cut-slice* line-comment-literal make-delimited-literal
] if ; ] if ;
! Words like append! and suffix! are allowed for now. ! Words like append! and suffix! are allowed for now.
: read-exclamation ( n string slice -- n' string obj ) : read-exclamation ( lexer slice -- obj )
dup { [ "!" sequence= ] [ "#!" sequence= ] } 1|| dup { [ "!" sequence= ] [ "#!" sequence= ] } 1||
[ take-comment ] [ merge-slice-til-whitespace make-tag-literal ] if ; [ take-comment ] [ merge-lex-til-whitespace make-tag-literal ] if ;
: read-backtick ( n string opening -- n' string obj ) : read-backtick ( lexer opening -- obj )
[ [
slice-til-whitespace drop lex-til-whitespace drop 2nip
dup dup
] dip 1 cut-slice* backtick-literal make-delimited-literal ; ] dip 1 cut-slice* backtick-literal make-delimited-literal ;
ERROR: backslash-expects-whitespace slice ; ERROR: backslash-expects-whitespace slice ;
: read-backslash ( n string slice -- n' string obj ) : read-backslash ( lexer slice -- obj )
2over peek-from blank? [ over ?lexer-nth blank? [
! \ foo, M\ foo ! \ foo, M\ foo
[ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* backslash-literal make-delimited-literal [ skip-blanks lex-til-whitespace drop 2nip dup ] dip 1 cut-slice* backslash-literal make-delimited-literal
] [ ] [
! M\N ! M\N
merge-slice-til-whitespace make-tag-literal merge-lex-til-whitespace make-tag-literal
] if ; ] if ;
! If the slice is 0 width, we stopped on whitespace. ! If the slice is 0 width, we stopped on whitespace.
! Advance the index and read again! ! Advance the index and read again!
: read-token-or-whitespace ( n string slice -- n' string slice ) : read-token-or-whitespace ( lexer slice -- slice )
[ [ 1 + ] dip lex-factor ] [ [ 1 + ] change-n lex-factor ]
[ make-tag-literal ] if-empty ; [ nip make-tag-literal ] if-empty ;
ERROR: mismatched-terminator n string slice ; ERROR: mismatched-terminator n string slice ;
: read-terminator ( n string slice -- n' string slice ) : read-terminator ( lexer slice -- slice )
nip
terminator-literal make-tag-class-literal ; terminator-literal make-tag-class-literal ;
: ?blank? ( ch/f -- blank/f ) : ?blank? ( ch/f -- blank/f )
@ -386,7 +388,7 @@ ERROR: mismatched-terminator n string slice ;
PRIVATE< PRIVATE<
! work on underlying, index is on the @ ! work on underlying, index is on the @
! @foo ! @foo
: left-decorator? ( obj -- ? ) : left-decorator? ( slice -- ? )
{ {
[ char-before-slice ?blank? ] [ char-before-slice ?blank? ]
[ next-char-from-slice ?blank? not ] [ next-char-from-slice ?blank? not ]
@ -401,7 +403,8 @@ PRIVATE<
PRIVATE> PRIVATE>
: read-decorator ( n string slice -- n' string obj ) : read-decorator ( lexer slice -- obj )
nip
{ {
{ [ dup left-decorator? ] [ t make-decorator-sentinel ] } { [ dup left-decorator? ] [ t make-decorator-sentinel ] }
! { [ dup right-decorator? ] [ ! { [ dup right-decorator? ] [
@ -423,13 +426,13 @@ COMPILE<
COMPILE> COMPILE>
! 0 "HI: ;" slice-til-either -> 3 "HI: ;" "HI:" CHAR: \: ! 0 "HI: ;" slice-til-either -> 3 "HI: ;" "HI:" CHAR: \:
MACRO: rules>call-lexer ( seq -- quot: ( n/f string -- n'/f string literal ) ) MACRO: rules>call-lexer ( seq -- quot: ( lexer string -- literal ) )
[ lexer-rules>delimiters ] [ lexer-rules>delimiters ]
[ [
lexer-rules>assoc lexer-rules>assoc
{ f [ f like dup [ make-tag-literal ] when ] } suffix { f [ nip f like dup [ make-tag-literal ] when ] } suffix
] bi ] bi
'[ _ slice-til-either _ case ] ; '[ dup _ lex-til-either [ 2drop ] 2dip _ case ] ;
CONSTANT: factor-lexing-rules { CONSTANT: factor-lexing-rules {
T{ line-comment-lexer { generator read-exclamation } { delimiter char: \! } } T{ line-comment-lexer { generator read-exclamation } { delimiter char: \! } }
@ -454,11 +457,11 @@ CONSTANT: factor-lexing-rules {
T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter char: \n } } T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter char: \n } }
} ; } ;
: lex-factor ( n/f string -- n'/f string literal ) : lex-factor ( lexer -- literal )
factor-lexing-rules rules>call-lexer ; factor-lexing-rules rules>call-lexer ;
: string>literals ( string -- sequence ) : string>literals ( string -- sequence )
[ 0 ] dip [ lex-factor ] loop>array 2nip postprocess-lexed ; <modern-lexer> '[ _ lex-factor ] loop>array postprocess-lexed ;
: path>literals ( path -- sequence ) : path>literals ( path -- sequence )
utf8 file-contents string>literals ; utf8 file-contents string>literals ;