factor: Add more parser guts.

2018-08-10 18:01:19 -05:00 · 2018-08-10 18:01:19 -05:00 · cbfdf2cfa8
parent 4bcae2590c
commit cbfdf2cfa8
8 changed files with 206 additions and 132 deletions
--- a/core/kernel/kernel.factor
+++ b/core/kernel/kernel.factor
@ -531,3 +531,5 @@ PRIVATE>
 : get-retainstack ( -- array )
    context retainstack-for ; inline
 : no-op ( obj -- obj ) ;
--- a/core/lexer/lexer.factor
+++ b/core/lexer/lexer.factor
@ -84,8 +84,8 @@ M: lexer skip-blank
 GENERIC: skip-word ( lexer -- )
-: find-container-delimiter ( i str -- n/f )
+: find-container-delimiter ( i str delim-str -- n/f )
-    2dup [ "[" member? ] find-from [
+    [ 2dup ] dip '[ _ member? ] find-from [
        [ swap subseq [ ch'= = ] all? ] keep and
    ] [
        3drop f
@ -93,11 +93,19 @@ GENERIC: skip-word ( lexer -- )
 M: lexer skip-word
    [
-        2dup [ " \"[" member? ] find-from
+        2dup [ " \"[{(" member? ] find-from
        {
            { ch'\" [ 2nip 1 + ] }
-            { ch'\[  [
+            { ch'\[ [
-                1 + over find-container-delimiter
+                1 + over "[" find-container-delimiter
                dup [ 2nip 1 + ] [ drop f skip ] if
            ] }
            { ch'\{ [
                1 + over "{" find-container-delimiter
                dup [ 2nip 1 + ] [ drop f skip ] if
            ] }
            { ch'\( [
                1 + over "(" find-container-delimiter
                dup [ 2nip 1 + ] [ drop f skip ] if
            ] }
            [ 2drop f skip ]
@ -140,8 +148,8 @@ DEFER: parse-token
 : unescape-token ( string -- string' )
    dup length 1 = [ "\\" ?head drop ] unless ;
-: unhashtag-token ( string -- string' )
+: unhashtag-token ( string -- string' ? )
-    dup length 1 = [ "#" ?head [ drop f ] when ] unless ;
+    dup length 1 = [ f ] [ "#" ?head >boolean ] if ;
 : unescape-tokens ( seq -- seq' )
    [ unescape-token ] map ;
@ -149,49 +157,12 @@ DEFER: parse-token
 : parse-token ( lexer -- str/f )
    dup parse-raw [ skip-comments ] [ drop f ] if* ;
-: ?scan-token ( -- str/f ) lexer get parse-token unescape-token unhashtag-token ;
+: ?scan-token ( -- str/f ) lexer get parse-token unescape-token ;
 PREDICATE: unexpected-eof < unexpected got>> not ;
 : throw-unexpected-eof ( word -- * ) f unexpected ;
 : (strict-single-quote?) ( string -- ? )
    "'" split1
    [ "'" head? not ]
    [
        [ length 0 > ]
        [
            ! ch'\'
            [ "\\'" tail? ] [ "'" tail? not ] bi or
        ] bi and
    ] bi* and ;
 : strict-single-quote? ( string -- ? )
    dup (strict-single-quote?)
    [ "'[" sequence= not ] [ drop f ] if ;
 : strict-lower-colon? ( string -- ? )
    [ ch'\: = ] cut-tail
    [
        [ length 0 > ] [
            [ [ ch'a ch'z between? ] [ "-" member? ] bi or ] all?
        ] bi and ]
    [ length 0 > ] bi* and ;
 : (strict-upper-colon?) ( string -- ? )
    ! All chars must...
    [
        [
            [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
        ] all?
    ]
    ! At least one char must...
    [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
 : strict-upper-colon? ( string -- ? )
    [ [ ch'\: = ] all? ]
    [ (strict-upper-colon?) ] bi or ;
 : scan-token ( -- str )
    ?scan-token [ "token" throw-unexpected-eof ] unless* ;
--- a/core/multiline/multiline.factor
+++ b/core/multiline/multiline.factor
@ -31,19 +31,3 @@ PRIVATE>
 : parse-multiline-string0 ( end-text -- str )
    lexer get 0 (parse-multiline-string) ;
 ! SYNTAX: \[[ "]]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[=[ "]=]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[==[ "]==]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[===[ "]===]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[====[ "]====]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[=====[ "]=====]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \[======[ "]======]" parse-multiline-string0 suffix! ;
 ! SYNTAX: \![[ "]]" parse-multiline-string0 drop ;
 ! SYNTAX: \![=[ "]=]" parse-multiline-string0 drop ;
 ! SYNTAX: \![==[ "]==]" parse-multiline-string0 drop ;
 ! SYNTAX: \![===[ "]===]" parse-multiline-string0 drop ;
 ! SYNTAX: \![====[ "]====]" parse-multiline-string0 drop ;
 ! SYNTAX: \![=====[ "]=====]" parse-multiline-string0 drop ;
 ! SYNTAX: \![======[ "]======]" parse-multiline-string0 drop ;
--- a/core/parser/parser.factor
+++ b/core/parser/parser.factor
@ -71,7 +71,18 @@ DEFER: scan-object
 : string>new-parser ( string -- string/obj ? )
    {
        ! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] }
        ! { [ dup strict-upper-colon? ] [ parse-upper-colon t ] }
        ! { [ dup strict-section? ] [ parse-section t ] }
        ! { [ dup strict-named-section? ] [ parse-named-section t ] }
        { [ dup strict-single-quote? ] [ parse-single-quote t ] }
        ! { [ dup strict-double-quote? ] [ parse-double-quote t ] }
        ! { [ dup strict-bracket-container? ] [ parse-bracket-container t ] }
        ! { [ dup strict-brace-container? ] [ parse-brace-container t ] }
        ! { [ dup strict-paren-container? ] [ parse-paren-container t ] }
        ! { [ dup strict-bracket? ] [ parse-bracket t ] }
        ! { [ dup strict-brace? ] [ parse-brace t ] }
        ! { [ dup strict-paren? ] [ parse-paren t ] }
        [ f ]
    } cond ;
@ -144,8 +155,12 @@ ERROR: classoid-expected object ;
    } cond ;
 : parse-until-step ( accum end -- accum ? )
-    ?scan-token string>new-parser
+    ?scan-token
-    [ nip suffix! t ] [ (parse-until-step) ] if ;
+    unhashtag-token
    [
        string>new-parser
        [ nip suffix! t ] [ (parse-until-step) ] if
    ] dip [ over pop* ] [ ] if ;
 : (parse-until) ( accum end -- accum )
    [ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ;
--- a/core/strings/parser/parser.factor
+++ b/core/strings/parser/parser.factor
@ -159,3 +159,10 @@ PRIVATE>
            lexer get (parse-string)
        ] keep unescape-string
    ] rewind-lexer-on-error ;
 : lookup-char ( char -- obj )
    {
        { [ dup length 1 = ] [ first ] }
        { [ "\\" ?head ] [ next-escape >string "" assert= ] }
        [ name>char-hook get ( name -- char ) call-effect ]
    } cond ;
--- a/core/syntax/modern/modern.factor
+++ b/core/syntax/modern/modern.factor
@ -1,56 +1,49 @@
 ! Copyright (C) 2018 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: assocs combinators kernel namespaces sequences splitting
+USING: arrays assocs combinators kernel math math.order
-strings strings.parser ;
+multiline namespaces sequences splitting strings strings.parser ;
 IN: syntax.modern
-INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
+: matching-delimiter ( ch -- ch' )
    H{
        { ch'\( ch'\) }
        { ch'\[ ch'\] }
        { ch'\{ ch'\} }
        { ch'< ch'> }
        { ch'\: ch'\; }
    } ?at drop ;
 : matching-delimiter-string ( string -- string' )
    [ matching-delimiter ] map ;
 INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: section-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: named-section-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ]
-: define-single-quote-word ( word def -- ) swap lower-colon-definitions get set-at ;
+: set-lower-colon-word ( word name -- ) lower-colon-definitions get set-at ;
-: define-lower-colon-word ( word def -- ) swap lower-colon-definitions get set-at ;
+: set-upper-colon-word ( word name -- ) upper-colon-definitions get set-at ;
-: define-upper-colon-word ( word def -- ) swap upper-colon-definitions get set-at ;
+: set-section-word ( word name -- ) section-definitions get set-at ;
-: define-double-quote-word ( word def -- ) swap double-quote-definitions get set-at ;
+: set-named-section-word ( word name -- ) named-section-definitions get set-at ;
-: define-bracket-container-word ( word def -- ) swap bracket-container-definitions get set-at ;
+: set-single-quote-word ( word name -- ) single-quote-definitions get set-at ;
-: define-brace-container-word ( word def -- ) swap brace-container-definitions get set-at ;
+: set-double-quote-word ( word name -- ) double-quote-definitions get set-at ;
-: define-paren-container-word ( word def -- ) swap paren-container-definitions get set-at ;
+: set-bracket-container-word ( word name -- ) bracket-container-definitions get set-at ;
 : set-brace-container-word ( word name -- ) brace-container-definitions get set-at ;
 : set-paren-container-word ( word name -- ) paren-container-definitions get set-at ;
-GENERIC: lower-colon>object ( obj -- obj' )
+: set-container-word ( word def -- )
 GENERIC: double-quote>object ( obj -- obj' )
 GENERIC: bracket-container>object ( obj -- obj' )
 GENERIC: brace-container>object ( obj -- obj' )
 GENERIC: paren-container>object ( obj -- obj' )
 ![[
    SYNTAX: LOWER-COLON:
    scan-new-class
    [ ]
    [ tuple { "object" } define-tuple-class ]
    [ '[ _ boa suffix! ] define-lower-colon-word ] tri ;
 ]]
 ERROR: no-single-quote-word payload word ;
 : handle-single-quote ( pair -- obj )
    first2 swap single-quote-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-single-quote-word ] if ;
 : ch>object ( ch -- obj )
    {
-        { [ dup length 1 = ] [ first ] }
+        [ set-single-quote-word ]
-        { [ "\\" ?head ] [ next-escape >string "" assert= ] }
+        [ set-double-quote-word ]
-        [ name>char-hook get ( name -- char ) call-effect ]
+        [ set-bracket-container-word ]
-    } cond ;
+        [ set-brace-container-word ]
-
+        [ set-paren-container-word ]
-\ ch>object "ch" single-quote-definitions get set-at
+    } 2cleave ;
 ERROR: no-lower-colon-word payload word ;
 : handle-lower-colon ( pair -- obj )
@ -58,5 +51,132 @@ ERROR: no-lower-colon-word payload word ;
    [ execute( obj -- obj' ) ]
    [ no-lower-colon-word ] if ;
-: no-op ( obj -- obj' ) ;
+ERROR: no-single-quote-word payload word ;
-\ no-op "data-stack" lower-colon-definitions get set-at
+: handle-single-quote ( pair -- obj )
    first2 swap single-quote-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-single-quote-word ] if ;
 ERROR: no-section-word payload word ;
 : handle-section ( pair -- obj )
    first2 swap section-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-section-word ] if ;
 ERROR: no-named-section-word payload word ;
 : handle-named-section ( pair -- obj )
    first2 swap named-section-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-named-section-word ] if ;
 ERROR: no-double-quote-word payload word ;
 : handle-double-quote ( pair -- obj )
    first2 swap double-quote-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-double-quote-word ] if ;
 ERROR: no-bracket-container-word payload word ;
 : handle-bracket-container ( pair -- obj )
    first2 swap bracket-container-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-bracket-container-word ] if ;
 ERROR: no-brace-container-word payload word ;
 : handle-brace-container ( pair -- obj )
    first2 swap brace-container-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-brace-container-word ] if ;
 ERROR: no-paren-container-word payload word ;
 : handle-paren-container ( pair -- obj )
    first2 swap paren-container-definitions get ?at
    [ execute( obj -- obj' ) ]
    [ no-paren-container-word ] if ;
 : lower-char? ( str -- ? ) [ ch'a ch'z between? ] [ ch'- = ] bi or ;
 : upper-char? ( str -- ? ) [ ch'A ch'Z between? ] [ ch'- = ] bi or ;
 : strict-lower-colon? ( string -- ? )
    [ ch'\: = ] cut-tail
    [
        [ length 0 > ] [ [ lower-char? ] all? ] bi and
    ] [ length 0 > ] bi* and ;
 : (strict-upper-colon?) ( string -- ? )
    ! All chars must...
    [
        [
            [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
        ] all?
    ]
    ! At least one char must...
    [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
 : strict-upper-colon? ( string -- ? )
    [ [ ch'\: = ] all? ]
    [ (strict-upper-colon?) ] bi or ;
 : strict-section-word? ( string -- ? )
    [ "<" head? ]
    [ rest [ upper-char? ] all? ] bi and ;
 : strict-named-section-word? ( string -- ? )
    [ "<" head? ]
    [ ":" tail? ]
    [ rest but-last [ upper-char? ] all? ] tri and and ;
 : (strict-single-quote?) ( string -- ? )
    "'" split1
    [ "'" head? not ]
    [
        [ length 0 > ]
        [
            ! ch'\'
            [ "\\'" tail? ] [ "'" tail? not ] bi or
        ] bi and
    ] bi* and ;
 : strict-single-quote? ( string -- ? )
    dup (strict-single-quote?)
    [ "'[" sequence= not ] [ drop f ] if ;
 : strict-double-quote? ( string -- ? ) ?last ch'\" = ;
 : strict-container? ( string open-str -- ? )
    [ split1 ] [ split1 ] bi
    [ ]
    [ [ ch'= = ] all? ]
    [ "" = ] tri* and and ;
 : strict-bracket-container? ( string -- ? ) "[" strict-container? ;
 : strict-brace-container? ( string -- ? ) "{" strict-container? ;
 : strict-paren-container? ( string -- ? ) "(" strict-container? ;
 : container-tag ( string open-str -- string' ) split1 drop ;
 : double-quote-tag ( string -- string' ) "\"" split1 drop ;
 : bracket-container-tag ( string -- string' ) "[" container-tag ;
 : brace-container-tag ( string -- string' ) "{" container-tag ;
 : paren-container-tag ( string -- string' ) "(" container-tag ;
 : parse-bracket-container ( string -- string' )
    "[" split1 "[" prepend matching-delimiter-string
    parse-multiline-string0 2array handle-bracket-container ;
 : parse-brace-container ( string -- string' )
    "{" split1 "{" prepend matching-delimiter-string
    parse-multiline-string0 2array handle-brace-container ;
 : parse-paren-container ( string -- string' )
    "(" split1 "(" prepend matching-delimiter-string
    parse-multiline-string0 2array handle-paren-container ;
 \ lookup-char "ch" set-container-word
 \ no-op "data-stack" set-lower-colon-word
 ! USE: urls \ >url "url" set-container-word
--- a/core/syntax/syntax.factor
+++ b/core/syntax/syntax.factor
@ -128,11 +128,8 @@ IN: bootstrap.syntax
    "f" [ f suffix! ] define-core-syntax
    "char:" [
-        lexer get parse-raw [ "token" throw-unexpected-eof ] unless* {
+        lexer get parse-raw [ "token" throw-unexpected-eof ] unless*
-            { [ dup length 1 = ] [ first ] }
+        lookup-char suffix!
            { [ "\\" ?head ] [ next-escape >string "" assert= ] }
            [ name>char-hook get call( name -- char ) ]
        } cond suffix!
    ] define-core-syntax
    "\"" [ parse-string suffix! ] define-core-syntax
@ -183,16 +180,6 @@ IN: bootstrap.syntax
        scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append!
    ] define-core-syntax
 ![[
    "INITIALIZED-SYMBOL:" [
        scan-new-word [ define-symbol ]
        [
            name>> "initialize-" prepend create-word-in dup reset-generic
            scan-object dupd [ initialize ] curry curry ( -- ) define-declared
        ] bi
    ] define-core-syntax
 ]]
    "SYMBOL:" [
        scan-new-word define-symbol
    ] define-core-syntax
--- a/extra/modern/slices/slices.factor
+++ b/extra/modern/slices/slices.factor
@ -7,18 +7,6 @@ IN: modern.slices
 : >strings ( seq -- str )
    [ dup slice? [ >string ] when ] deep-map ;
 : matching-delimiter ( ch -- ch' )
    H{
        { ch'\( ch'\) }
        { ch'\[ ch'\] }
        { ch'\{ ch'\} }
        { ch'< ch'> }
        { ch'\: ch'\; }
    } ?at drop ;
 : matching-delimiter-string ( string -- string' )
    [ matching-delimiter ] map ;
 : matching-section-delimiter ( string -- string' )
    dup ":" tail? [
        rest but-last ";" ">" surround