factor/extra/modern/modern.factor

! Copyright (C) 2016 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays assocs combinators combinators.short-circuit
continuations fry io.encodings.utf8 io.files kernel locals make
math math.order modern.paths modern.slices sequences
sequences.extras sets splitting strings unicode vocabs.loader ;
IN: modern

ERROR: string-expected-got-eof n string ;
ERROR: long-opening-mismatch tag open n string ch ;

! (( )) [[ ]] {{ }}
MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) )
    open-ch dup matching-delimiter {
        [ drop 2 swap <string> ]
        [ drop 1string ]
        [ nip 2 swap <string> ]
    } 2cleave :> ( openstr2 openstr1 closestr2 )
    [| n string tag! ch |
        ch {
            { CHAR: = [
                tag 1 cut-slice* drop tag! ! tag of (=( is ( here, fix it
                n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch )
                ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless
                opening matching-delimiter-string :> needle

                n' string' needle slice-til-string :> ( n'' string'' payload closing )
                n'' string
                tag opening payload closing 4array
            ] }
            { open-ch [
                tag 1 cut-slice* swap tag! 1 modify-to :> opening
                n 1 + string closestr2 slice-til-string :> ( n' string' payload closing )
                n' string
                tag opening payload closing 4array
            ] }
            [ [ tag openstr2 n string ] dip long-opening-mismatch ]
        } case
     ] ;

: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ;
: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ;
: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ;

DEFER: lex-factor-top
DEFER: lex-factor
ERROR: lex-expected-but-got-eof n string expected ;
! For implementing [ { (
: lex-until ( n string tag-sequence -- n' string payload )
    3dup '[
        [
            lex-factor-top dup f like [ , ] when* [
                dup [
                    ! } gets a chance, but then also full seq { } after recursion...
                    [ _ ] dip '[ _ sequence= ] any? not
                ] [
                    drop t ! loop again?
                ] if
            ] [
                _ _ _ lex-expected-but-got-eof
            ] if*
        ] loop
    ] { } make ;

DEFER: section-close?
DEFER: upper-colon?
DEFER: lex-factor-nested
: lex-colon-until ( n string tag-sequence -- n' string payload )
    '[
        [
            lex-factor-nested dup f like [ , ] when* [
                dup [
                    ! This is for ending COLON: forms like ``A: PRIVATE>``
                    dup section-close? [
                        drop f
                    ] [
                        ! } gets a chance, but then also full seq { } after recursion...
                        [ _ ] dip '[ _ sequence= ] any? not
                    ] if
                ] [
                    drop t ! loop again?
                ] if
            ] [
                f
            ] if*
        ] loop
    ] { } make ;

: split-double-dash ( seq -- seqs )
    dup [ { [ "--" sequence= ] } 1&& ] split-when
    dup length 1 > [ nip ] [ drop ] if ;

MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) )
    ch dup matching-delimiter {
        [ drop "=" swap prefix ]
        [ nip 1string ]
    } 2cleave :> ( openstreq closestr1 )  ! [= ]
    [| n string tag |
        n string tag
        2over nth-check-eof {
            { [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or ((
            { [ dup blank? ] [
                drop dup '[ _ matching-delimiter-string closestr1 2array members lex-until ] dip
                swap unclip-last 3array ] } ! ( foo )
            [ drop [ slice-til-whitespace drop ] dip span-slices ]  ! (foo)
        } cond
    ] ;

: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ;
: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ;
: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ;
: read-string-payload ( n string -- n' string )
    over [
        { CHAR: \\ CHAR: \" } slice-til-separator-inclusive {
            { f [ drop ] }
            { CHAR: \" [ drop ] }
            { CHAR: \\ [ drop next-char-from drop read-string-payload ] }
        } case
    ] [
        string-expected-got-eof
    ] if ;

:: read-string ( n string tag -- n' string seq )
    n string read-string-payload drop :> n'
    n' string
    n' [ n string string-expected-got-eof ] unless
    n n' 1 - string <slice>
    n' 1 - n' string <slice>
    tag -rot 3array ;

: take-comment ( n string slice -- n' string comment )
    2over ?nth CHAR: [ = [
        [ 1 + ] 2dip 2over ?nth read-double-matched-bracket
    ] [
        [ slice-til-eol drop ] dip swap 2array
    ] if ;

: terminator? ( slice -- ? )
    {
        [ ";" sequence= ]
        [ "]" sequence= ]
        [ "}" sequence= ]
        [ ")" sequence= ]
    } 1|| ;

ERROR: expected-length-tokens n string length seq ;
: ensure-no-false ( n string seq -- n string seq )
    dup [ length 0 > ] all? [ [ length ] keep expected-length-tokens ] unless ;

ERROR: token-expected n string obj ;
ERROR: unexpected-terminator n string slice ;
: read-lowercase-colon ( n string slice -- n' string lowercase-colon )
    dup [ CHAR: : = ] count-tail
    '[
        _ [ lex-factor ] replicate ensure-no-false dup [ token-expected ] unless
        dup terminator? [ unexpected-terminator ] when
    ] dip swap 2array ;

: (strict-upper?) ( string -- ? )
    {
        ! All chars must...
        [
            [
                { [ CHAR: A CHAR: Z between? ] [ "':-\\#" member? ] } 1||
            ] all?
        ]
        ! At least one char must...
        [ [ { [ CHAR: A CHAR: Z between? ] [ CHAR: ' = ] } 1|| ] any? ]
    } 1&& ;

: strict-upper? ( string -- ? )
    { [ ":" sequence= ] [ (strict-upper?) ] } 1|| ;

! <A <A: but not <A>
: section-open? ( string -- ? )
    {
        [ "<" head? ]
        [ length 2 >= ]
        [ rest strict-upper? ]
        [ ">" tail? not ]
    } 1&& ;

: html-self-close? ( string -- ? )
    {
        [ "<" head? ]
        [ length 2 >= ]
        [ rest strict-upper? not ]
        [ [ blank? ] any? not ]
        [ "/>" tail? ]
    } 1&& ;

: html-full-open? ( string -- ? )
    {
        [ "<" head? ]
        [ length 2 >= ]
        [ second CHAR: / = not ]
        [ rest strict-upper? not ]
        [ [ blank? ] any? not ]
        [ ">" tail? ]
    } 1&& ;

: html-half-open? ( string -- ? )
    {
        [ "<" head? ]
        [ length 2 >= ]
        [ second CHAR: / = not ]
        [ rest strict-upper? not ]
        [ [ blank? ] any? not ]
        [ ">" tail? not ]
    } 1&& ;

: html-close? ( string -- ? )
    {
        [ "</" head? ]
        [ length 2 >= ]
        [ rest strict-upper? not ]
        [ [ blank? ] any? not ]
        [ ">" tail? ]
    } 1&& ;

: special-acute? ( string -- ? )
    {
        [ section-open? ]
        [ html-self-close? ]
        [ html-full-open? ]
        [ html-half-open? ]
        [ html-close? ]
    } 1|| ;

: upper-colon? ( string -- ? )
    dup { [ length 0 > ] [ [ CHAR: : = ] all? ] } 1&& [
        drop t
    ] [
        {
            [ length 2 >= ]
            [ "\\" head? not ] ! XXX: good?
            [ ":" tail? ]
            [ dup [ CHAR: : = ] find drop head strict-upper? ]
        } 1&&
    ] if ;

: section-close? ( string -- ? )
    {
        [ length 2 >= ]
        [ "\\" head? not ] ! XXX: good?
        [ ">" tail? ]
        [
            {
                [ but-last strict-upper? ]
                [ { [ ";" head? ] [ rest but-last strict-upper? ] } 1&& ]
            } 1||
        ]
    } 1&& ;

: read-til-semicolon ( n string slice -- n' string semi )
    dup '[ but-last ";" append ";" 2array { "--" ")" } append lex-colon-until ] dip
    swap
    ! What ended the FOO: .. ; form?
    ! Remove the ; from the payload if present
    ! XXX: probably can remove this, T: is dumb
    ! Also in stack effects ( T: int -- ) can be ended by -- and )
    dup ?last {
        { [ dup ";" sequence= ] [ drop unclip-last 3array ] }
        { [ dup ";" tail? ] [ drop unclip-last 3array ] }
        { [ dup "--" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
        { [ dup "]" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
        { [ dup "}" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
        { [ dup ")" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } ! (n*quot) breaks
        { [ dup section-close? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
        { [ dup upper-colon? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
        [ drop 2array ]
    } cond ;

ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ;
: read-colon ( n string slice -- n' string colon )
    {
        { [ dup strict-upper? ] [ read-til-semicolon ] }
        { [ dup ":" tail? ] [ dup ":" head? [ read-lowercase-colon ] unless ] } ! :foo: vs foo:
        [ ]
    } cond ;

: read-acute-html ( n string slice -- n' string acute )
    {
        ! <FOO <FOO:
        { [ dup section-open? ] [
            [
                matching-section-delimiter 1array lex-until
            ] keep swap unclip-last 3array
        ] }
        ! <foo/>
        { [ dup html-self-close? ] [
            ! do nothing special
        ] }
        ! <foo>
        { [ dup html-full-open? ] [
            dup [
                rest-slice
                dup ">" tail? [ but-last-slice ] when
                "</" ">" surround 1array lex-until unclip-last
            ] dip -rot 3array
        ] }
        ! <foo
        { [ dup html-half-open? ] [
            ! n seq slice
            [ { ">" "/>" } lex-until ] dip
            ! n seq slice2 slice
            over ">" sequence= [
                "</" ">" surround array '[ _ lex-until ] dip unclip-last
                -rot roll unclip-last [ 3array ] 2dip 3array
            ] [
                ! self-contained
                swap unclip-last 3array
            ] if
        ] }
        ! </foo>
        { [ dup html-close? ] [
            ! Do nothing
        ] }
        [ [ slice-til-whitespace drop ] dip span-slices ]
    } cond ;

: read-acute ( n string slice -- n' string acute )
    [ matching-section-delimiter 1array lex-until ] keep swap unclip-last 3array ;

! Words like append! and suffix! are allowed for now.
: read-exclamation ( n string slice -- n' string obj )
    dup { [ "!" sequence= ] [ "#!" sequence= ] } 1||
    [ take-comment ] [ merge-slice-til-whitespace ] if ;

ERROR: no-backslash-payload n string slice ;
: (read-backslash) ( n string slice -- n' string obj )
    merge-slice-til-whitespace dup "\\" tail? [
        ! \ foo, M\ foo
        dup [ CHAR: \\ = ] count-tail
        '[
            _ [ skip-blank-from slice-til-whitespace drop ] replicate
            ensure-no-false
            dup [ no-backslash-payload ] unless
        ] dip swap 2array
    ] when ;

DEFER: lex-factor-top*
: read-backslash ( n string slice -- n' string obj )
    ! foo\ so far, could be foo\bar{
    ! remove the \ and continue til delimiter/eof
    [ "\"!:[{(<>\s\r\n" slice-til-either ] dip swap [ span-slices ] dip
    over "\\" head? [
        drop
        ! \ foo
        dup [ CHAR: \\ = ] all? [ (read-backslash) ] [ merge-slice-til-whitespace ] if
    ] [
        ! foo\ or foo\bar (?)
        over "\\" tail? [ drop (read-backslash) ] [ lex-factor-top* ] if
    ] if ;

! If the slice is 0 width, we stopped on whitespace.
! Advance the index and read again!

: read-token-or-whitespace-top ( n string slice -- n' string slice/f )
    dup length 0 = [ [ 1 + ] 2dip drop lex-factor-top ] when ;

: read-token-or-whitespace-nested ( n string slice -- n' string slice/f )
    dup length 0 = [ [ 1 + ] 2dip drop lex-factor-nested ] when ;

: lex-factor-fallthrough ( n/f string slice/f ch/f -- n'/f string literal )
    {
        { CHAR: \ [ read-backslash ] }
        { CHAR: [ [ read-bracket ] }
        { CHAR: { [ read-brace ] }
        { CHAR: ( [ read-paren ] }
        { CHAR: ] [ ] }
        { CHAR: } [ ] }
        { CHAR: ) [ ] }
        { CHAR: " [ read-string ] }
        { CHAR: ! [ read-exclamation ] }
        { CHAR: > [
            [ [ CHAR: > = not ] slice-until ] dip merge-slices
            dup section-close? [
                [ slice-til-whitespace drop ] dip ?span-slices
            ] unless
        ] }
        { f [ ] }
    } case ;

! Inside a FOO: or a <FOO FOO>
: lex-factor-nested* ( n/f string slice/f ch/f -- n'/f string literal )
    {
        ! Nested ``A: a B: b`` so rewind and let the parser get it top-level
        { CHAR: : [
            ! A: B: then interrupt the current parser
            ! A: b: then keep going
            merge-slice-til-whitespace
            dup { [ upper-colon? ] [ ":" = ] } 1||
            ! dup upper-colon?
            [ rewind-slice f ]
            [ read-colon ] if
        ] }
        { CHAR: < [
            ! FOO: a b <BAR: ;BAR>
            ! FOO: a b <BAR BAR>
            ! FOO: a b <asdf>
            ! FOO: a b <asdf asdf>

            ! if we are in a FOO: and we hit a <BAR or <BAR:
            ! then end the FOO:
            ! Don't rewind for a <foo/> or <foo></foo>
            [ slice-til-whitespace drop ] dip span-slices
            dup section-open? [ rewind-slice f ] when
        ] }
        { CHAR: \s [ read-token-or-whitespace-nested ] }
        { CHAR: \r [ read-token-or-whitespace-nested ] }
        { CHAR: \n [ read-token-or-whitespace-nested ] }
        [ lex-factor-fallthrough ]
    } case ;

: lex-factor-nested ( n/f string -- n'/f string literal )
    ! skip-whitespace
    "\"\\!:[{(]})<>\s\r\n" slice-til-either
    lex-factor-nested* ; inline

: lex-factor-top* ( n/f string slice/f ch/f -- n'/f string literal )
    {
        { CHAR: : [ merge-slice-til-whitespace read-colon ] }
        { CHAR: < [
            ! FOO: a b <BAR: ;BAR>
            ! FOO: a b <BAR BAR>
            ! FOO: a b <asdf>
            ! FOO: a b <asdf asdf>

            ! if we are in a FOO: and we hit a <BAR or <BAR:
            ! then end the FOO:
            [ slice-til-whitespace drop ] dip span-slices
            ! read-acute-html
            dup section-open? [ read-acute ] when
        ] }

        { CHAR: \s [ read-token-or-whitespace-top ] }
        { CHAR: \r [ read-token-or-whitespace-top ] }
        { CHAR: \n [ read-token-or-whitespace-top ] }
        [ lex-factor-fallthrough ]
    } case ;

: lex-factor-top ( n/f string -- n'/f string literal )
    ! skip-whitespace
    "\"\\!:[{(]})<>\s\r\n" slice-til-either
    lex-factor-top* ; inline

ERROR: compound-syntax-disallowed n seq obj ;
: check-for-compound-syntax ( n/f seq obj -- n/f seq obj )
    dup length 1 > [ compound-syntax-disallowed ] when ;

: check-compound-loop ( n/f string -- n/f string ? )
    [ ] [ peek-from ] [ previous-from ] 2tri
    [ blank? ] bi@ or not ! no blanks between tokens
    pick and ; ! and a valid index

: lex-factor ( n/f string/f -- n'/f string literal/f )
    [
        ! Compound syntax loop
        [
            lex-factor-top f like [ , ] when*
            ! concatenated syntax ( a )[ a 1 + ]( b )
            check-compound-loop
        ] loop
    ] { } make
    check-for-compound-syntax
    ! concat ! "ALIAS: n*quot (n*quot)" string>literals ... breaks here
    ?first f like ;

: string>literals ( string -- sequence )
    [ 0 ] dip [
        [ lex-factor [ , ] when* over ] loop
    ] { } make 2nip ;

: vocab>literals ( vocab -- sequence )
    ".private" ?tail drop
    vocab-source-path utf8 file-contents string>literals ;

: path>literals ( path -- sequence )
    utf8 file-contents string>literals ;

: lex-paths ( vocabs -- assoc )
    [ [ path>literals ] [ nip ] recover ] map-zip ;

: lex-vocabs ( vocabs -- assoc )
    [ [ vocab>literals ] [ nip ] recover ] map-zip ;

: failed-lexing ( assoc -- assoc' ) [ nip array? ] assoc-reject ;

: lex-core ( -- assoc ) core-vocabs lex-vocabs ;
: lex-basis ( -- assoc ) basis-vocabs lex-vocabs ;
: lex-extra ( -- assoc ) extra-vocabs lex-vocabs ;
: lex-roots ( -- assoc ) lex-core lex-basis lex-extra 3append ;

: lex-docs ( -- assoc ) all-docs-paths lex-paths ;
: lex-tests ( -- assoc ) all-tests-paths lex-paths ;

: lex-all ( -- assoc )
    lex-roots lex-docs lex-tests 3append ;
modern: Add to extra/ some version of modern that has the modern.paths vocab. For travisci. 2018-07-04 19:47:14 -04:00			`! Copyright (C) 2016 Doug Coleman.`
			`! See http://factorcode.org/license.txt for BSD license.`
			`USING: arrays assocs combinators combinators.short-circuit`
			`continuations fry io.encodings.utf8 io.files kernel locals make`
			`math math.order modern.paths modern.slices sequences`
			`sequences.extras sets splitting strings unicode vocabs.loader ;`
			`IN: modern`

			`ERROR: string-expected-got-eof n string ;`
			`ERROR: long-opening-mismatch tag open n string ch ;`

			`! (( )) [[ ]] {{ }}`
			`MACRO:: read-double-matched ( open-ch -- quot: ( n string tag ch -- n' string seq ) )`
			`open-ch dup matching-delimiter {`
			`[ drop 2 swap <string> ]`
			`[ drop 1string ]`
			`[ nip 2 swap <string> ]`
			`} 2cleave :> ( openstr2 openstr1 closestr2 )`
			`[\| n string tag! ch \|`
			`ch {`
			`{ CHAR: = [`
			`tag 1 cut-slice* drop tag! ! tag of (=( is ( here, fix it`
			`n string openstr1 slice-til-separator-inclusive [ -1 modify-from ] dip :> ( n' string' opening ch )`
			`ch open-ch = [ tag openstr2 n string ch long-opening-mismatch ] unless`
			`opening matching-delimiter-string :> needle`

			`n' string' needle slice-til-string :> ( n'' string'' payload closing )`
			`n'' string`
			`tag opening payload closing 4array`
			`] }`
			`{ open-ch [`
			`tag 1 cut-slice* swap tag! 1 modify-to :> opening`
			`n 1 + string closestr2 slice-til-string :> ( n' string' payload closing )`
			`n' string`
			`tag opening payload closing 4array`
			`] }`
			`[ [ tag openstr2 n string ] dip long-opening-mismatch ]`
			`} case`
			`] ;`

			`: read-double-matched-paren ( n string tag ch -- n' string seq ) CHAR: ( read-double-matched ;`
			`: read-double-matched-bracket ( n string tag ch -- n' string seq ) CHAR: [ read-double-matched ;`
			`: read-double-matched-brace ( n string tag ch -- n' string seq ) CHAR: { read-double-matched ;`

			`DEFER: lex-factor-top`
			`DEFER: lex-factor`
			`ERROR: lex-expected-but-got-eof n string expected ;`
			`! For implementing [ { (`
			`: lex-until ( n string tag-sequence -- n' string payload )`
			`3dup '[`
			`[`
			`lex-factor-top dup f like [ , ] when* [`
			`dup [`
			`! } gets a chance, but then also full seq { } after recursion...`
			`[ _ ] dip '[ _ sequence= ] any? not`
			`] [`
			`drop t ! loop again?`
			`] if`
			`] [`
			`_ _ _ lex-expected-but-got-eof`
			`] if*`
			`] loop`
			`] { } make ;`

			`DEFER: section-close?`
			`DEFER: upper-colon?`
			`DEFER: lex-factor-nested`
			`: lex-colon-until ( n string tag-sequence -- n' string payload )`
			`'[`
			`[`
			`lex-factor-nested dup f like [ , ] when* [`
			`dup [`
			! This is for ending COLON: forms like ``A: PRIVATE>``
			`dup section-close? [`
			`drop f`
			`] [`
			`! } gets a chance, but then also full seq { } after recursion...`
			`[ _ ] dip '[ _ sequence= ] any? not`
			`] if`
			`] [`
			`drop t ! loop again?`
			`] if`
			`] [`
			`f`
			`] if*`
			`] loop`
			`] { } make ;`

			`: split-double-dash ( seq -- seqs )`
			`dup [ { [ "--" sequence= ] } 1&& ] split-when`
			`dup length 1 > [ nip ] [ drop ] if ;`

			`MACRO:: read-matched ( ch -- quot: ( n string tag -- n' string slice' ) )`
			`ch dup matching-delimiter {`
			`[ drop "=" swap prefix ]`
			`[ nip 1string ]`
			`} 2cleave :> ( openstreq closestr1 ) ! [= ]`
			`[\| n string tag \|`
			`n string tag`
			`2over nth-check-eof {`
			`{ [ dup openstreq member? ] [ ch read-double-matched ] } ! (=( or ((`
			`{ [ dup blank? ] [`
			`drop dup '[ _ matching-delimiter-string closestr1 2array members lex-until ] dip`
			`swap unclip-last 3array ] } ! ( foo )`
			`[ drop [ slice-til-whitespace drop ] dip span-slices ] ! (foo)`
			`} cond`
			`] ;`

			`: read-bracket ( n string slice -- n' string slice' ) CHAR: [ read-matched ;`
			`: read-brace ( n string slice -- n' string slice' ) CHAR: { read-matched ;`
			`: read-paren ( n string slice -- n' string slice' ) CHAR: ( read-matched ;`
			`: read-string-payload ( n string -- n' string )`
			`over [`
			`{ CHAR: \\ CHAR: \" } slice-til-separator-inclusive {`
			`{ f [ drop ] }`
			`{ CHAR: \" [ drop ] }`
			`{ CHAR: \\ [ drop next-char-from drop read-string-payload ] }`
			`} case`
			`] [`
			`string-expected-got-eof`
			`] if ;`

			`:: read-string ( n string tag -- n' string seq )`
			`n string read-string-payload drop :> n'`
			`n' string`
			`n' [ n string string-expected-got-eof ] unless`
			`n n' 1 - string <slice>`
			`n' 1 - n' string <slice>`
			`tag -rot 3array ;`

			`: take-comment ( n string slice -- n' string comment )`
			`2over ?nth CHAR: [ = [`
			`[ 1 + ] 2dip 2over ?nth read-double-matched-bracket`
			`] [`
			`[ slice-til-eol drop ] dip swap 2array`
			`] if ;`

			`: terminator? ( slice -- ? )`
			`{`
			`[ ";" sequence= ]`
			`[ "]" sequence= ]`
			`[ "}" sequence= ]`
			`[ ")" sequence= ]`
			`} 1\|\| ;`

			`ERROR: expected-length-tokens n string length seq ;`
			`: ensure-no-false ( n string seq -- n string seq )`
			`dup [ length 0 > ] all? [ [ length ] keep expected-length-tokens ] unless ;`

			`ERROR: token-expected n string obj ;`
			`ERROR: unexpected-terminator n string slice ;`
			`: read-lowercase-colon ( n string slice -- n' string lowercase-colon )`
			`dup [ CHAR: : = ] count-tail`
			`'[`
			`_ [ lex-factor ] replicate ensure-no-false dup [ token-expected ] unless`
			`dup terminator? [ unexpected-terminator ] when`
			`] dip swap 2array ;`

			`: (strict-upper?) ( string -- ? )`
			`{`
			`! All chars must...`
			`[`
			`[`
			`{ [ CHAR: A CHAR: Z between? ] [ "':-\\#" member? ] } 1\|\|`
			`] all?`
			`]`
			`! At least one char must...`
			`[ [ { [ CHAR: A CHAR: Z between? ] [ CHAR: ' = ] } 1\|\| ] any? ]`
			`} 1&& ;`

			`: strict-upper? ( string -- ? )`
			`{ [ ":" sequence= ] [ (strict-upper?) ] } 1\|\| ;`

			`! <A <A: but not <A>`
			`: section-open? ( string -- ? )`
			`{`
			`[ "<" head? ]`
			`[ length 2 >= ]`
			`[ rest strict-upper? ]`
			`[ ">" tail? not ]`
			`} 1&& ;`

			`: html-self-close? ( string -- ? )`
			`{`
			`[ "<" head? ]`
			`[ length 2 >= ]`
			`[ rest strict-upper? not ]`
			`[ [ blank? ] any? not ]`
			`[ "/>" tail? ]`
			`} 1&& ;`

			`: html-full-open? ( string -- ? )`
			`{`
			`[ "<" head? ]`
			`[ length 2 >= ]`
			`[ second CHAR: / = not ]`
			`[ rest strict-upper? not ]`
			`[ [ blank? ] any? not ]`
			`[ ">" tail? ]`
			`} 1&& ;`

			`: html-half-open? ( string -- ? )`
			`{`
			`[ "<" head? ]`
			`[ length 2 >= ]`
			`[ second CHAR: / = not ]`
			`[ rest strict-upper? not ]`
			`[ [ blank? ] any? not ]`
			`[ ">" tail? not ]`
			`} 1&& ;`

			`: html-close? ( string -- ? )`
			`{`
			`[ "</" head? ]`
			`[ length 2 >= ]`
			`[ rest strict-upper? not ]`
			`[ [ blank? ] any? not ]`
			`[ ">" tail? ]`
			`} 1&& ;`

			`: special-acute? ( string -- ? )`
			`{`
			`[ section-open? ]`
			`[ html-self-close? ]`
			`[ html-full-open? ]`
			`[ html-half-open? ]`
			`[ html-close? ]`
			`} 1\|\| ;`

			`: upper-colon? ( string -- ? )`
			`dup { [ length 0 > ] [ [ CHAR: : = ] all? ] } 1&& [`
			`drop t`
			`] [`
			`{`
			`[ length 2 >= ]`
			`[ "\\" head? not ] ! XXX: good?`
			`[ ":" tail? ]`
			`[ dup [ CHAR: : = ] find drop head strict-upper? ]`
			`} 1&&`
			`] if ;`

			`: section-close? ( string -- ? )`
			`{`
			`[ length 2 >= ]`
			`[ "\\" head? not ] ! XXX: good?`
			`[ ">" tail? ]`
			`[`
			`{`
			`[ but-last strict-upper? ]`
			`[ { [ ";" head? ] [ rest but-last strict-upper? ] } 1&& ]`
			`} 1\|\|`
			`]`
			`} 1&& ;`

			`: read-til-semicolon ( n string slice -- n' string semi )`
			`dup '[ but-last ";" append ";" 2array { "--" ")" } append lex-colon-until ] dip`
			`swap`
			`! What ended the FOO: .. ; form?`
			`! Remove the ; from the payload if present`
			`! XXX: probably can remove this, T: is dumb`
			`! Also in stack effects ( T: int -- ) can be ended by -- and )`
			`dup ?last {`
			`{ [ dup ";" sequence= ] [ drop unclip-last 3array ] }`
			`{ [ dup ";" tail? ] [ drop unclip-last 3array ] }`
			`{ [ dup "--" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }`
			`{ [ dup "]" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }`
			`{ [ dup "}" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }`
			`{ [ dup ")" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } ! (n*quot) breaks`
			`{ [ dup section-close? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }`
			`{ [ dup upper-colon? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }`
			`[ drop 2array ]`
			`} cond ;`

			`ERROR: colon-word-must-be-all-uppercase-or-lowercase n string word ;`
			`: read-colon ( n string slice -- n' string colon )`
			`{`
			`{ [ dup strict-upper? ] [ read-til-semicolon ] }`
			`{ [ dup ":" tail? ] [ dup ":" head? [ read-lowercase-colon ] unless ] } ! :foo: vs foo:`
			`[ ]`
			`} cond ;`

			`: read-acute-html ( n string slice -- n' string acute )`
			`{`
			`! <FOO <FOO:`
			`{ [ dup section-open? ] [`
			`[`
			`matching-section-delimiter 1array lex-until`
			`] keep swap unclip-last 3array`
			`] }`
			`! <foo/>`
			`{ [ dup html-self-close? ] [`
			`! do nothing special`
			`] }`
			`! <foo>`
			`{ [ dup html-full-open? ] [`
			`dup [`
			`rest-slice`
			`dup ">" tail? [ but-last-slice ] when`
			`"</" ">" surround 1array lex-until unclip-last`
			`] dip -rot 3array`
			`] }`
			`! <foo`
			`{ [ dup html-half-open? ] [`
			`! n seq slice`
			`[ { ">" "/>" } lex-until ] dip`
			`! n seq slice2 slice`
			`over ">" sequence= [`
			`"</" ">" surround array '[ _ lex-until ] dip unclip-last`
			`-rot roll unclip-last [ 3array ] 2dip 3array`
			`] [`
			`! self-contained`
			`swap unclip-last 3array`
			`] if`
			`] }`
			`! </foo>`
			`{ [ dup html-close? ] [`
			`! Do nothing`
			`] }`
			`[ [ slice-til-whitespace drop ] dip span-slices ]`
			`} cond ;`

			`: read-acute ( n string slice -- n' string acute )`
			`[ matching-section-delimiter 1array lex-until ] keep swap unclip-last 3array ;`

			`! Words like append! and suffix! are allowed for now.`
			`: read-exclamation ( n string slice -- n' string obj )`
			`dup { [ "!" sequence= ] [ "#!" sequence= ] } 1\|\|`
			`[ take-comment ] [ merge-slice-til-whitespace ] if ;`

			`ERROR: no-backslash-payload n string slice ;`
			`: (read-backslash) ( n string slice -- n' string obj )`
			`merge-slice-til-whitespace dup "\\" tail? [`
			`! \ foo, M\ foo`
			`dup [ CHAR: \\ = ] count-tail`
			`'[`
			`_ [ skip-blank-from slice-til-whitespace drop ] replicate`
			`ensure-no-false`
			`dup [ no-backslash-payload ] unless`
			`] dip swap 2array`
			`] when ;`

			`DEFER: lex-factor-top*`
			`: read-backslash ( n string slice -- n' string obj )`
			`! foo\ so far, could be foo\bar{`
			`! remove the \ and continue til delimiter/eof`
			`[ "\"!:[{(<>\s\r\n" slice-til-either ] dip swap [ span-slices ] dip`
			`over "\\" head? [`
			`drop`
			`! \ foo`
			`dup [ CHAR: \\ = ] all? [ (read-backslash) ] [ merge-slice-til-whitespace ] if`
			`] [`
			`! foo\ or foo\bar (?)`
			`over "\\" tail? [ drop (read-backslash) ] [ lex-factor-top* ] if`
			`] if ;`

			`! If the slice is 0 width, we stopped on whitespace.`
			`! Advance the index and read again!`

			`: read-token-or-whitespace-top ( n string slice -- n' string slice/f )`
			`dup length 0 = [ [ 1 + ] 2dip drop lex-factor-top ] when ;`

			`: read-token-or-whitespace-nested ( n string slice -- n' string slice/f )`
			`dup length 0 = [ [ 1 + ] 2dip drop lex-factor-nested ] when ;`

			`: lex-factor-fallthrough ( n/f string slice/f ch/f -- n'/f string literal )`
			`{`
			`{ CHAR: \ [ read-backslash ] }`
			`{ CHAR: [ [ read-bracket ] }`
			`{ CHAR: { [ read-brace ] }`
			`{ CHAR: ( [ read-paren ] }`
			`{ CHAR: ] [ ] }`
			`{ CHAR: } [ ] }`
			`{ CHAR: ) [ ] }`
			`{ CHAR: " [ read-string ] }`
			`{ CHAR: ! [ read-exclamation ] }`
			`{ CHAR: > [`
			`[ [ CHAR: > = not ] slice-until ] dip merge-slices`
			`dup section-close? [`
			`[ slice-til-whitespace drop ] dip ?span-slices`
			`] unless`
			`] }`
			`{ f [ ] }`
			`} case ;`

			`! Inside a FOO: or a <FOO FOO>`
			`: lex-factor-nested* ( n/f string slice/f ch/f -- n'/f string literal )`
			`{`
			! Nested ``A: a B: b`` so rewind and let the parser get it top-level
			`{ CHAR: : [`
			`! A: B: then interrupt the current parser`
			`! A: b: then keep going`
			`merge-slice-til-whitespace`
			`dup { [ upper-colon? ] [ ":" = ] } 1\|\|`
			`! dup upper-colon?`
			`[ rewind-slice f ]`
			`[ read-colon ] if`
			`] }`
			`{ CHAR: < [`
			`! FOO: a b <BAR: ;BAR>`
			`! FOO: a b <BAR BAR>`
			`! FOO: a b <asdf>`
			`! FOO: a b <asdf asdf>`

			`! if we are in a FOO: and we hit a <BAR or <BAR:`
			`! then end the FOO:`
			`! Don't rewind for a <foo/> or <foo></foo>`
			`[ slice-til-whitespace drop ] dip span-slices`
			`dup section-open? [ rewind-slice f ] when`
			`] }`
			`{ CHAR: \s [ read-token-or-whitespace-nested ] }`
			`{ CHAR: \r [ read-token-or-whitespace-nested ] }`
			`{ CHAR: \n [ read-token-or-whitespace-nested ] }`
			`[ lex-factor-fallthrough ]`
			`} case ;`

			`: lex-factor-nested ( n/f string -- n'/f string literal )`
			`! skip-whitespace`
			`"\"\\!:[{(]})<>\s\r\n" slice-til-either`
			`lex-factor-nested* ; inline`

			`: lex-factor-top* ( n/f string slice/f ch/f -- n'/f string literal )`
			`{`
			`{ CHAR: : [ merge-slice-til-whitespace read-colon ] }`
			`{ CHAR: < [`
			`! FOO: a b <BAR: ;BAR>`
			`! FOO: a b <BAR BAR>`
			`! FOO: a b <asdf>`
			`! FOO: a b <asdf asdf>`

			`! if we are in a FOO: and we hit a <BAR or <BAR:`
			`! then end the FOO:`
			`[ slice-til-whitespace drop ] dip span-slices`
			`! read-acute-html`
			`dup section-open? [ read-acute ] when`
			`] }`

			`{ CHAR: \s [ read-token-or-whitespace-top ] }`
			`{ CHAR: \r [ read-token-or-whitespace-top ] }`
			`{ CHAR: \n [ read-token-or-whitespace-top ] }`
			`[ lex-factor-fallthrough ]`
			`} case ;`

			`: lex-factor-top ( n/f string -- n'/f string literal )`
			`! skip-whitespace`
			`"\"\\!:[{(]})<>\s\r\n" slice-til-either`
			`lex-factor-top* ; inline`

			`ERROR: compound-syntax-disallowed n seq obj ;`
			`: check-for-compound-syntax ( n/f seq obj -- n/f seq obj )`
			`dup length 1 > [ compound-syntax-disallowed ] when ;`

			`: check-compound-loop ( n/f string -- n/f string ? )`
			`[ ] [ peek-from ] [ previous-from ] 2tri`
			`[ blank? ] bi@ or not ! no blanks between tokens`
			`pick and ; ! and a valid index`

			`: lex-factor ( n/f string/f -- n'/f string literal/f )`
			`[`
			`! Compound syntax loop`
			`[`
			`lex-factor-top f like [ , ] when*`
			`! concatenated syntax ( a )[ a 1 + ]( b )`
			`check-compound-loop`
			`] loop`
			`] { } make`
			`check-for-compound-syntax`
			`! concat ! "ALIAS: nquot (nquot)" string>literals ... breaks here`
			`?first f like ;`

			`: string>literals ( string -- sequence )`
			`[ 0 ] dip [`
			`[ lex-factor [ , ] when* over ] loop`
			`] { } make 2nip ;`

			`: vocab>literals ( vocab -- sequence )`
			`".private" ?tail drop`
			`vocab-source-path utf8 file-contents string>literals ;`

			`: path>literals ( path -- sequence )`
			`utf8 file-contents string>literals ;`

			`: lex-paths ( vocabs -- assoc )`
			`[ [ path>literals ] [ nip ] recover ] map-zip ;`

			`: lex-vocabs ( vocabs -- assoc )`
			`[ [ vocab>literals ] [ nip ] recover ] map-zip ;`

			`: failed-lexing ( assoc -- assoc' ) [ nip array? ] assoc-reject ;`

modern: core-vocabs should not return the broken test vocabs. 2018-07-04 20:05:37 -04:00			`: lex-core ( -- assoc ) core-vocabs lex-vocabs ;`
modern: Add to extra/ some version of modern that has the modern.paths vocab. For travisci. 2018-07-04 19:47:14 -04:00			`: lex-basis ( -- assoc ) basis-vocabs lex-vocabs ;`
			`: lex-extra ( -- assoc ) extra-vocabs lex-vocabs ;`
			`: lex-roots ( -- assoc ) lex-core lex-basis lex-extra 3append ;`

			`: lex-docs ( -- assoc ) all-docs-paths lex-paths ;`
			`: lex-tests ( -- assoc ) all-tests-paths lex-paths ;`

			`: lex-all ( -- assoc )`
			`lex-roots lex-docs lex-tests 3append ;`