From dda8add101904e848f0ab8aa9832c736304ca3a9 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sun, 19 Jun 2016 11:47:06 -0700 Subject: [PATCH] modern: use lexer instead of `n string`. --- core/modern/lexer/lexer.factor | 23 +++++- core/modern/modern.factor | 147 +++++++++++++++++---------------- 2 files changed, 97 insertions(+), 73 deletions(-) diff --git a/core/modern/lexer/lexer.factor b/core/modern/lexer/lexer.factor index c8ac378f75..13ed261bcc 100644 --- a/core/modern/lexer/lexer.factor +++ b/core/modern/lexer/lexer.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2016 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors constructors kernel math sequences -sequences.extras slots.syntax ; +sequences.extras slots.syntax unicode ; in: modern.lexer TUPLE: modern-lexer n string stack ; @@ -11,6 +11,16 @@ CONSTRUCTOR: modern-lexer ( string -- obj ) : >lexer< ( lexer -- n string ) slots[ n string ] ; +: ?lexer-nth ( lexer -- obj ) + >lexer< over [ ?nth ] [ 2drop f ] if ; + +ERROR: unexpected-end n string ; +: nth-check-eof ( n string -- nth ) + 2dup ?nth [ 2nip ] [ unexpected-end ] if* ; inline + +: lexer-nth-check-eof ( lexer -- nth ) + >lexer< nth-check-eof ; + :: slice-til-either ( n string tokens -- n'/f string slice/f ch/f ) n [ n string '[ tokens member? ] find-from @@ -115,3 +125,14 @@ ERROR: subseq-expected-but-got-eof n string expected ; lexer n' >>n drop n' string' payload closing ; + + +: find-from* ( ... n seq quot: ( ... elt -- ... ? ) -- ... i elt ? ) + [ find-from ] keep + pick [ drop t ] [ length -rot nip f ] if ; inline + +: skip-blank-from ( n string -- n' string ) + [ [ blank? not ] find-from* 2drop ] keep ; inline + +: skip-blanks ( lexer -- lexer ) + dup >lexer< skip-blank-from drop >>n ; inline diff --git a/core/modern/modern.factor b/core/modern/modern.factor index d5fe7b02f2..601089c37e 100644 --- a/core/modern/modern.factor +++ b/core/modern/modern.factor @@ -4,7 +4,7 @@ USING: accessors arrays assocs assocs.extras combinators combinators.short-circuit constructors continuations fry io.encodings.utf8 io.files kernel locals macros make math math.order modern.paths modern.slices multiline namespaces -quotations sequences sequences.extras splitting +quotations sequences sequences.extras splitting modern.lexer splitting.monotonic strings unicode generalizations ; in: modern @@ -203,39 +203,37 @@ ERROR: mismatched-closing opening closing ; delimiter 1array >>seq delimiter >>underlying ; inline -ERROR: long-opening-mismatch tag open n string ch ; +ERROR: long-opening-mismatch tag open lexer ch ; ! (( )) [[ ]] {{ }} -MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) ) +MACRO:: read-double-matched ( open-ch -- quot: ( lexer tag ch -- seq ) ) open-ch dup matching-delimiter { [ drop 2 swap ] [ drop 1string ] [ nip 2 swap ] } 2cleave :> ( openstr2 openstr1 closestr2 ) - |[ n string tag! ch | + |[ lexer tag! ch | ch { { char: = [ - n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) - ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless + lexer openstr1 lex-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch ) + ch open-ch = [ tag openstr2 lexer ch long-opening-mismatch ] unless opening matching-delimiter-string :> needle - n' string' needle slice-til-string :> ( n'' string'' payload closing ) - n'' string + lexer needle lex-til-string :> ( n'' string'' payload closing ) payload closing tag opening double-matched-literal make-matched-literal ] } { open-ch [ tag 1 cut-slice* swap tag! 1 modify-to :> opening - n 1 + string closestr2 slice-til-string :> ( n' string' payload closing ) - n' string + lexer [ 1 + ] change-n closestr2 lex-til-string :> ( n' string' payload closing ) payload closing tag opening double-matched-literal make-matched-literal ] } - [ [ tag openstr2 n string ] dip long-opening-mismatch ] + [ [ tag openstr2 lexer ] dip long-opening-mismatch ] } case ] ; -: read-double-matched-paren ( n string tag ch -- n' string seq ) char: \( read-double-matched ; -: read-double-matched-bracket ( n string tag ch -- n' string seq ) char: \[ read-double-matched ; -: read-double-matched-brace ( n string tag ch -- n' string seq ) char: \{ read-double-matched ; +: read-double-matched-paren ( lexer tag ch -- seq ) char: \( read-double-matched ; +: read-double-matched-bracket ( lexer tag ch -- seq ) char: \[ read-double-matched ; +: read-double-matched-brace ( lexer tag ch -- seq ) char: \{ read-double-matched ; defer: lex defer: lex-factor @@ -249,11 +247,11 @@ ERROR: lex-expected-but-got-eof n string quot ; ERROR: unnestable-form n string obj ; ! For implementing [ { ( -: lex-until ( n string tags -- n' string payload closing ) +: lex-until ( lexer tags -- payload closing ) ! 3 npick [ lex-expected-but-got-eof ] unless '[ [ - lex-factor [ + _ lex-factor [ ! [ _ _ _ lex-expected-but-got-eof ] unless* dup tag-literal? [ dup , @@ -266,38 +264,41 @@ ERROR: unnestable-form n string obj ; ] loop ] { } make unclip-last ; inline -MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) +MACRO:: read-matched ( ch -- quot: ( lexer tag -- slice' ) ) ch dup matching-delimiter { [ drop "=" swap prefix ] [ nip 1string ] } 2cleave :> ( openstreq closestr1 ) ! [= ] - |[ n string tag | - n string tag - 2over nth-check-eof { - { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( - { [ dup blank? ] [ drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip 1 cut-slice* single-matched-literal make-matched-literal ] } ! ( foo ) - [ drop [ slice-til-whitespace drop ] dip span-slices make-tag-literal ] ! (foo) + + |[ lexer tag | + lexer tag + over lexer-nth-check-eof { + { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or (( + { [ dup blank? ] [ drop dup '[ _ matching-delimiter-string closestr1 2array lex-until ] dip 1 cut-slice* single-matched-literal make-matched-literal ] } ! ( foo ) + [ drop [ lex-til-whitespace drop 2nip ] dip span-slices make-tag-literal ] ! (foo) } cond ] ; -: read-bracket ( n string slice -- n' string slice' ) char: \[ read-matched ; -: read-brace ( n string slice -- n' string slice' ) char: \{ read-matched ; -: read-paren ( n string slice -- n' string slice' ) char: \( read-matched ; +: read-bracket ( lexer slice -- slice' ) char: \[ read-matched ; +: read-brace ( lexer slice -- slice' ) char: \{ read-matched ; +: read-paren ( lexer slice -- slice' ) char: \( read-matched ; -: read-string-payload ( n string -- n' string ) - over [ - { char: \\ char: \" } slice-til-separator-inclusive { - { f [ drop ] } - { char: \" [ drop ] } - { char: \\ [ drop next-char-from drop read-string-payload ] } +:: read-string-payload ( lexer -- n' string slice ) + lexer dup ?lexer-nth [ + { char: \\ char: \" } lex-til-separator-inclusive :> ( n' string' slice ch ) + ch { + { f [ n' string' slice ] } + { char: \" [ n' string' slice ] } + { char: \\ [ lexer [ 1 + ] change-n read-string-payload ] } } case ] [ - string-expected-got-eof + lexer >lexer< f string-expected-got-eof ] if ; -:: read-string ( n string tag -- n' string seq ) - n string read-string-payload drop :> n' - n' string +:: read-string ( lexer tag -- seq ) + lexer n>> :> n + lexer read-string-payload :> ( n' string slice ) + ! n' string n' [ n string string-expected-got-eof ] unless n n' 1 - string n' 1 - n' string @@ -306,78 +307,79 @@ MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) ) ERROR: cannot-nest-upper-colon n string string' ; -: read-upper-colon ( n string string' -- n' string obj ) +: read-upper-colon ( lexer string' -- obj ) ! 4 npick 0 > [ cannot-nest-upper-colon ] when dup [ scoped-colon-name [ but-last ";" append ";" 2array ] [ ";" 1array ] if* lex-until ] dip 1 cut-slice* uppercase-colon-literal make-matched-literal ; -: read-lower-colon ( n string string' -- n' string obj ) +: read-lower-colon ( lexer string' -- obj ) [ lex-factor dup ] dip 1 cut-slice* lowercase-colon-literal make-delimited-literal ; ! : foo: :foo foo:bar foo:BAR: foo:bar: :foo: -: read-colon ( n string slice -- n' string colon ) - merge-slice-til-whitespace { +: read-colon ( lexer slice -- colon ) + dupd merge-lex-til-whitespace { { [ dup length 1 = ] [ read-upper-colon ] } { [ dup [ char: \: = ] all? ] [ read-upper-colon ] } - { [ dup { [ ":" head? ] [ ":" tail? ] } 1&& ] [ make-tag-literal ] } + { [ dup { [ ":" head? ] [ ":" tail? ] } 1&& ] [ nip make-tag-literal ] } { [ dup ":" tail? ] [ dup scoped-upper? [ read-upper-colon ] [ read-lower-colon ] if ] } - { [ dup ":" head? ] [ make-tag-literal ] } ! :foo( ... ) - [ make-tag-literal ] + { [ dup ":" head? ] [ nip make-tag-literal ] } ! :foo( ... ) + [ nip make-tag-literal ] } cond ; -: read-upper-less-than ( n string slice -- n' string less-than ) +: read-upper-less-than ( lexer slice -- less-than ) dup [ scoped-less-than-name [ but-last ">" append 1array ] [ ">" 1array ] if* lex-until ] dip 1 cut-slice* less-than-literal make-matched-literal ; -: read-less-than ( n string slice -- n' string less-than ) - merge-slice-til-whitespace { - { [ dup length 1 = ] [ make-tag-literal ] } ! "<" - { [ dup "<" tail? ] [ dup scoped-upper? [ read-upper-less-than ] [ make-tag-literal ] if ] } ! FOO< or foo< - [ make-tag-literal ] +: read-less-than ( lexer slice -- less-than ) + dupd merge-lex-til-whitespace { + { [ dup length 1 = ] [ nip make-tag-literal ] } ! "<" + { [ dup "<" tail? ] [ dup scoped-upper? [ read-upper-less-than ] [ nip make-tag-literal ] if ] } ! FOO< or foo< + [ nip make-tag-literal ] } cond ; -: take-comment ( n string slice -- n' string comment ) - 2over ?nth char: \[ = [ - [ 1 + ] 2dip 2over ?nth read-double-matched-bracket +: take-comment ( lexer slice -- comment ) + over ?lexer-nth char: \[ = [ + [ [ 1 + ] change-n ] dip over ?lexer-nth read-double-matched-bracket ] [ - [ slice-til-eol drop dup ] dip 1 cut-slice* line-comment-literal make-delimited-literal + [ lex-til-eol drop 2nip dup ] dip 1 cut-slice* line-comment-literal make-delimited-literal ] if ; ! Words like append! and suffix! are allowed for now. -: read-exclamation ( n string slice -- n' string obj ) +: read-exclamation ( lexer slice -- obj ) dup { [ "!" sequence= ] [ "#!" sequence= ] } 1|| - [ take-comment ] [ merge-slice-til-whitespace make-tag-literal ] if ; + [ take-comment ] [ merge-lex-til-whitespace make-tag-literal ] if ; -: read-backtick ( n string opening -- n' string obj ) +: read-backtick ( lexer opening -- obj ) [ - slice-til-whitespace drop + lex-til-whitespace drop 2nip dup ] dip 1 cut-slice* backtick-literal make-delimited-literal ; ERROR: backslash-expects-whitespace slice ; -: read-backslash ( n string slice -- n' string obj ) - 2over peek-from blank? [ +: read-backslash ( lexer slice -- obj ) + over ?lexer-nth blank? [ ! \ foo, M\ foo - [ skip-blank-from slice-til-whitespace drop dup ] dip 1 cut-slice* backslash-literal make-delimited-literal + [ skip-blanks lex-til-whitespace drop 2nip dup ] dip 1 cut-slice* backslash-literal make-delimited-literal ] [ ! M\N - merge-slice-til-whitespace make-tag-literal + merge-lex-til-whitespace make-tag-literal ] if ; ! If the slice is 0 width, we stopped on whitespace. ! Advance the index and read again! -: read-token-or-whitespace ( n string slice -- n' string slice ) - [ [ 1 + ] dip lex-factor ] - [ make-tag-literal ] if-empty ; +: read-token-or-whitespace ( lexer slice -- slice ) + [ [ 1 + ] change-n lex-factor ] + [ nip make-tag-literal ] if-empty ; ERROR: mismatched-terminator n string slice ; -: read-terminator ( n string slice -- n' string slice ) +: read-terminator ( lexer slice -- slice ) + nip terminator-literal make-tag-class-literal ; : ?blank? ( ch/f -- blank/f ) @@ -386,7 +388,7 @@ ERROR: mismatched-terminator n string slice ; PRIVATE< ! work on underlying, index is on the @ ! @foo -: left-decorator? ( obj -- ? ) +: left-decorator? ( slice -- ? ) { [ char-before-slice ?blank? ] [ next-char-from-slice ?blank? not ] @@ -401,7 +403,8 @@ PRIVATE< PRIVATE> -: read-decorator ( n string slice -- n' string obj ) +: read-decorator ( lexer slice -- obj ) + nip { { [ dup left-decorator? ] [ t make-decorator-sentinel ] } ! { [ dup right-decorator? ] [ @@ -423,13 +426,13 @@ COMPILE< COMPILE> ! 0 "HI: ;" slice-til-either -> 3 "HI: ;" "HI:" CHAR: \: -MACRO: rules>call-lexer ( seq -- quot: ( n/f string -- n'/f string literal ) ) +MACRO: rules>call-lexer ( seq -- quot: ( lexer string -- literal ) ) [ lexer-rules>delimiters ] [ lexer-rules>assoc - { f [ f like dup [ make-tag-literal ] when ] } suffix + { f [ nip f like dup [ make-tag-literal ] when ] } suffix ] bi - '[ _ slice-til-either _ case ] ; + '[ dup _ lex-til-either [ 2drop ] 2dip _ case ] ; CONSTANT: factor-lexing-rules { T{ line-comment-lexer { generator read-exclamation } { delimiter char: \! } } @@ -454,11 +457,11 @@ CONSTANT: factor-lexing-rules { T{ whitespace-lexer { generator read-token-or-whitespace } { delimiter char: \n } } } ; -: lex-factor ( n/f string -- n'/f string literal ) +: lex-factor ( lexer -- literal ) factor-lexing-rules rules>call-lexer ; : string>literals ( string -- sequence ) - [ 0 ] dip [ lex-factor ] loop>array 2nip postprocess-lexed ; + '[ _ lex-factor ] loop>array postprocess-lexed ; : path>literals ( path -- sequence ) utf8 file-contents string>literals ;