From cbfdf2cfa89baebbb1a8e49c3cc3c1f9838842d6 Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Fri, 10 Aug 2018 18:01:19 -0500
Subject: [PATCH] factor: Add more parser guts.

---
 core/kernel/kernel.factor         |   2 +
 core/lexer/lexer.factor           |  61 +++------
 core/multiline/multiline.factor   |  16 ---
 core/parser/parser.factor         |  19 ++-
 core/strings/parser/parser.factor |   7 +
 core/syntax/modern/modern.factor  | 204 ++++++++++++++++++++++++------
 core/syntax/syntax.factor         |  17 +--
 extra/modern/slices/slices.factor |  12 --
 8 files changed, 206 insertions(+), 132 deletions(-)

diff --git a/core/kernel/kernel.factor b/core/kernel/kernel.factor
index 3d7010183a..ec199b29dd 100644
--- a/core/kernel/kernel.factor
+++ b/core/kernel/kernel.factor
@@ -531,3 +531,5 @@ PRIVATE>
 
 : get-retainstack ( -- array )
     context retainstack-for ; inline
+
+: no-op ( obj -- obj ) ;
\ No newline at end of file
diff --git a/core/lexer/lexer.factor b/core/lexer/lexer.factor
index 7ffde33f1c..58bf8ff342 100644
--- a/core/lexer/lexer.factor
+++ b/core/lexer/lexer.factor
@@ -84,8 +84,8 @@ M: lexer skip-blank
 
 GENERIC: skip-word ( lexer -- )
 
-: find-container-delimiter ( i str -- n/f )
-    2dup [ "[" member? ] find-from [
+: find-container-delimiter ( i str delim-str -- n/f )
+    [ 2dup ] dip '[ _ member? ] find-from [
         [ swap subseq [ ch'= = ] all? ] keep and
     ] [
         3drop f
@@ -93,11 +93,19 @@ GENERIC: skip-word ( lexer -- )
 
 M: lexer skip-word
     [
-        2dup [ " \"[" member? ] find-from
+        2dup [ " \"[{(" member? ] find-from
         {
             { ch'\" [ 2nip 1 + ] }
-            { ch'\[  [
-                1 + over find-container-delimiter
+            { ch'\[ [
+                1 + over "[" find-container-delimiter
+                dup [ 2nip 1 + ] [ drop f skip ] if
+            ] }
+            { ch'\{ [
+                1 + over "{" find-container-delimiter
+                dup [ 2nip 1 + ] [ drop f skip ] if
+            ] }
+            { ch'\( [
+                1 + over "(" find-container-delimiter
                 dup [ 2nip 1 + ] [ drop f skip ] if
             ] }
             [ 2drop f skip ]
@@ -140,8 +148,8 @@ DEFER: parse-token
 : unescape-token ( string -- string' )
     dup length 1 = [ "\\" ?head drop ] unless ;
 
-: unhashtag-token ( string -- string' )
-    dup length 1 = [ "#" ?head [ drop f ] when ] unless ;
+: unhashtag-token ( string -- string' ? )
+    dup length 1 = [ f ] [ "#" ?head >boolean ] if ;
 
 : unescape-tokens ( seq -- seq' )
     [ unescape-token ] map ;
@@ -149,49 +157,12 @@ DEFER: parse-token
 : parse-token ( lexer -- str/f )
     dup parse-raw [ skip-comments ] [ drop f ] if* ;
 
-: ?scan-token ( -- str/f ) lexer get parse-token unescape-token unhashtag-token ;
+: ?scan-token ( -- str/f ) lexer get parse-token unescape-token ;
 
 PREDICATE: unexpected-eof < unexpected got>> not ;
 
 : throw-unexpected-eof ( word -- * ) f unexpected ;
 
-: (strict-single-quote?) ( string -- ? )
-    "'" split1
-    [ "'" head? not ]
-    [
-        [ length 0 > ]
-        [
-            ! ch'\'
-            [ "\\'" tail? ] [ "'" tail? not ] bi or
-        ] bi and
-    ] bi* and ;
-
-: strict-single-quote? ( string -- ? )
-    dup (strict-single-quote?)
-    [ "'[" sequence= not ] [ drop f ] if ;
-
-: strict-lower-colon? ( string -- ? )
-    [ ch'\: = ] cut-tail
-    [
-        [ length 0 > ] [
-            [ [ ch'a ch'z between? ] [ "-" member? ] bi or ] all?
-        ] bi and ]
-    [ length 0 > ] bi* and ;
-
-: (strict-upper-colon?) ( string -- ? )
-    ! All chars must...
-    [
-        [
-            [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
-        ] all?
-    ]
-    ! At least one char must...
-    [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
-
-: strict-upper-colon? ( string -- ? )
-    [ [ ch'\: = ] all? ]
-    [ (strict-upper-colon?) ] bi or ;
-
 : scan-token ( -- str )
     ?scan-token [ "token" throw-unexpected-eof ] unless* ;
 
diff --git a/core/multiline/multiline.factor b/core/multiline/multiline.factor
index 71ff84fc08..1d1323d3c6 100644
--- a/core/multiline/multiline.factor
+++ b/core/multiline/multiline.factor
@@ -31,19 +31,3 @@ PRIVATE>
 
 : parse-multiline-string0 ( end-text -- str )
     lexer get 0 (parse-multiline-string) ;
-
-! SYNTAX: \[[ "]]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[=[ "]=]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[==[ "]==]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[===[ "]===]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[====[ "]====]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[=====[ "]=====]" parse-multiline-string0 suffix! ;
-! SYNTAX: \[======[ "]======]" parse-multiline-string0 suffix! ;
-
-! SYNTAX: \![[ "]]" parse-multiline-string0 drop ;
-! SYNTAX: \![=[ "]=]" parse-multiline-string0 drop ;
-! SYNTAX: \![==[ "]==]" parse-multiline-string0 drop ;
-! SYNTAX: \![===[ "]===]" parse-multiline-string0 drop ;
-! SYNTAX: \![====[ "]====]" parse-multiline-string0 drop ;
-! SYNTAX: \![=====[ "]=====]" parse-multiline-string0 drop ;
-! SYNTAX: \![======[ "]======]" parse-multiline-string0 drop ;
diff --git a/core/parser/parser.factor b/core/parser/parser.factor
index fe502dfbc5..6e6373d6b6 100644
--- a/core/parser/parser.factor
+++ b/core/parser/parser.factor
@@ -71,7 +71,18 @@ DEFER: scan-object
 : string>new-parser ( string -- string/obj ? )
     {
         ! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] }
+        ! { [ dup strict-upper-colon? ] [ parse-upper-colon t ] }
+        ! { [ dup strict-section? ] [ parse-section t ] }
+        ! { [ dup strict-named-section? ] [ parse-named-section t ] }
         { [ dup strict-single-quote? ] [ parse-single-quote t ] }
+        ! { [ dup strict-double-quote? ] [ parse-double-quote t ] }
+        ! { [ dup strict-bracket-container? ] [ parse-bracket-container t ] }
+        ! { [ dup strict-brace-container? ] [ parse-brace-container t ] }
+        ! { [ dup strict-paren-container? ] [ parse-paren-container t ] }
+
+        ! { [ dup strict-bracket? ] [ parse-bracket t ] }
+        ! { [ dup strict-brace? ] [ parse-brace t ] }
+        ! { [ dup strict-paren? ] [ parse-paren t ] }
         [ f ]
     } cond ;
 
@@ -144,8 +155,12 @@ ERROR: classoid-expected object ;
     } cond ;
 
 : parse-until-step ( accum end -- accum ? )
-    ?scan-token string>new-parser
-    [ nip suffix! t ] [ (parse-until-step) ] if ;
+    ?scan-token
+    unhashtag-token
+    [
+        string>new-parser
+        [ nip suffix! t ] [ (parse-until-step) ] if
+    ] dip [ over pop* ] [ ] if ;
 
 : (parse-until) ( accum end -- accum )
     [ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ;
diff --git a/core/strings/parser/parser.factor b/core/strings/parser/parser.factor
index bb0deba403..caf080e36e 100644
--- a/core/strings/parser/parser.factor
+++ b/core/strings/parser/parser.factor
@@ -159,3 +159,10 @@ PRIVATE>
             lexer get (parse-string)
         ] keep unescape-string
     ] rewind-lexer-on-error ;
+
+: lookup-char ( char -- obj )
+    {
+        { [ dup length 1 = ] [ first ] }
+        { [ "\\" ?head ] [ next-escape >string "" assert= ] }
+        [ name>char-hook get ( name -- char ) call-effect ]
+    } cond ;
\ No newline at end of file
diff --git a/core/syntax/modern/modern.factor b/core/syntax/modern/modern.factor
index fc20faea0c..745c58c5c1 100644
--- a/core/syntax/modern/modern.factor
+++ b/core/syntax/modern/modern.factor
@@ -1,56 +1,49 @@
 ! Copyright (C) 2018 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: assocs combinators kernel namespaces sequences splitting
-strings strings.parser ;
+USING: arrays assocs combinators kernel math math.order
+multiline namespaces sequences splitting strings strings.parser ;
 IN: syntax.modern
 
-INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
+: matching-delimiter ( ch -- ch' )
+    H{
+        { ch'\( ch'\) }
+        { ch'\[ ch'\] }
+        { ch'\{ ch'\} }
+        { ch'< ch'> }
+        { ch'\: ch'\; }
+    } ?at drop ;
+
+: matching-delimiter-string ( string -- string' )
+    [ matching-delimiter ] map ;
+
 INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ]
+INITIALIZED-SYMBOL: section-definitions [ H{ } clone ]
+INITIALIZED-SYMBOL: named-section-definitions [ H{ } clone ]
+INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ]
 INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ]
 
-: define-single-quote-word ( word def -- ) swap lower-colon-definitions get set-at ;
-: define-lower-colon-word ( word def -- ) swap lower-colon-definitions get set-at ;
-: define-upper-colon-word ( word def -- ) swap upper-colon-definitions get set-at ;
-: define-double-quote-word ( word def -- ) swap double-quote-definitions get set-at ;
-: define-bracket-container-word ( word def -- ) swap bracket-container-definitions get set-at ;
-: define-brace-container-word ( word def -- ) swap brace-container-definitions get set-at ;
-: define-paren-container-word ( word def -- ) swap paren-container-definitions get set-at ;
+: set-lower-colon-word ( word name -- ) lower-colon-definitions get set-at ;
+: set-upper-colon-word ( word name -- ) upper-colon-definitions get set-at ;
+: set-section-word ( word name -- ) section-definitions get set-at ;
+: set-named-section-word ( word name -- ) named-section-definitions get set-at ;
+: set-single-quote-word ( word name -- ) single-quote-definitions get set-at ;
+: set-double-quote-word ( word name -- ) double-quote-definitions get set-at ;
+: set-bracket-container-word ( word name -- ) bracket-container-definitions get set-at ;
+: set-brace-container-word ( word name -- ) brace-container-definitions get set-at ;
+: set-paren-container-word ( word name -- ) paren-container-definitions get set-at ;
 
-GENERIC: lower-colon>object ( obj -- obj' )
-GENERIC: double-quote>object ( obj -- obj' )
-GENERIC: bracket-container>object ( obj -- obj' )
-GENERIC: brace-container>object ( obj -- obj' )
-GENERIC: paren-container>object ( obj -- obj' )
-
-![[
-    SYNTAX: LOWER-COLON:
-    scan-new-class
-    [ ]
-    [ tuple { "object" } define-tuple-class ]
-    [ '[ _ boa suffix! ] define-lower-colon-word ] tri ;
-]]
-
-
-ERROR: no-single-quote-word payload word ;
-: handle-single-quote ( pair -- obj )
-    first2 swap single-quote-definitions get ?at
-    [ execute( obj -- obj' ) ]
-    [ no-single-quote-word ] if ;
-
-: ch>object ( ch -- obj )
+: set-container-word ( word def -- )
     {
-        { [ dup length 1 = ] [ first ] }
-        { [ "\\" ?head ] [ next-escape >string "" assert= ] }
-        [ name>char-hook get ( name -- char ) call-effect ]
-    } cond ;
-
-\ ch>object "ch" single-quote-definitions get set-at
-
-
+        [ set-single-quote-word ]
+        [ set-double-quote-word ]
+        [ set-bracket-container-word ]
+        [ set-brace-container-word ]
+        [ set-paren-container-word ]
+    } 2cleave ;
 
 ERROR: no-lower-colon-word payload word ;
 : handle-lower-colon ( pair -- obj )
@@ -58,5 +51,132 @@ ERROR: no-lower-colon-word payload word ;
     [ execute( obj -- obj' ) ]
     [ no-lower-colon-word ] if ;
 
-: no-op ( obj -- obj' ) ;
-\ no-op "data-stack" lower-colon-definitions get set-at
+ERROR: no-single-quote-word payload word ;
+: handle-single-quote ( pair -- obj )
+    first2 swap single-quote-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-single-quote-word ] if ;
+
+ERROR: no-section-word payload word ;
+: handle-section ( pair -- obj )
+    first2 swap section-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-section-word ] if ;
+
+ERROR: no-named-section-word payload word ;
+: handle-named-section ( pair -- obj )
+    first2 swap named-section-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-named-section-word ] if ;
+
+ERROR: no-double-quote-word payload word ;
+: handle-double-quote ( pair -- obj )
+    first2 swap double-quote-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-double-quote-word ] if ;
+
+ERROR: no-bracket-container-word payload word ;
+: handle-bracket-container ( pair -- obj )
+    first2 swap bracket-container-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-bracket-container-word ] if ;
+
+ERROR: no-brace-container-word payload word ;
+: handle-brace-container ( pair -- obj )
+    first2 swap brace-container-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-brace-container-word ] if ;
+
+ERROR: no-paren-container-word payload word ;
+: handle-paren-container ( pair -- obj )
+    first2 swap paren-container-definitions get ?at
+    [ execute( obj -- obj' ) ]
+    [ no-paren-container-word ] if ;
+
+
+: lower-char? ( str -- ? ) [ ch'a ch'z between? ] [ ch'- = ] bi or ;
+: upper-char? ( str -- ? ) [ ch'A ch'Z between? ] [ ch'- = ] bi or ;
+
+
+: strict-lower-colon? ( string -- ? )
+    [ ch'\: = ] cut-tail
+    [
+        [ length 0 > ] [ [ lower-char? ] all? ] bi and
+    ] [ length 0 > ] bi* and ;
+
+
+
+
+: (strict-upper-colon?) ( string -- ? )
+    ! All chars must...
+    [
+        [
+            [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
+        ] all?
+    ]
+    ! At least one char must...
+    [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
+
+: strict-upper-colon? ( string -- ? )
+    [ [ ch'\: = ] all? ]
+    [ (strict-upper-colon?) ] bi or ;
+
+
+: strict-section-word? ( string -- ? )
+    [ "<" head? ]
+    [ rest [ upper-char? ] all? ] bi and ;
+
+: strict-named-section-word? ( string -- ? )
+    [ "<" head? ]
+    [ ":" tail? ]
+    [ rest but-last [ upper-char? ] all? ] tri and and ;
+
+: (strict-single-quote?) ( string -- ? )
+    "'" split1
+    [ "'" head? not ]
+    [
+        [ length 0 > ]
+        [
+            ! ch'\'
+            [ "\\'" tail? ] [ "'" tail? not ] bi or
+        ] bi and
+    ] bi* and ;
+
+: strict-single-quote? ( string -- ? )
+    dup (strict-single-quote?)
+    [ "'[" sequence= not ] [ drop f ] if ;
+
+: strict-double-quote? ( string -- ? ) ?last ch'\" = ;
+
+: strict-container? ( string open-str -- ? )
+    [ split1 ] [ split1 ] bi
+    [ ]
+    [ [ ch'= = ] all? ]
+    [ "" = ] tri* and and ;
+
+: strict-bracket-container? ( string -- ? ) "[" strict-container? ;
+: strict-brace-container? ( string -- ? ) "{" strict-container? ;
+: strict-paren-container? ( string -- ? ) "(" strict-container? ;
+
+: container-tag ( string open-str -- string' ) split1 drop ;
+: double-quote-tag ( string -- string' ) "\"" split1 drop ;
+: bracket-container-tag ( string -- string' ) "[" container-tag ;
+: brace-container-tag ( string -- string' ) "{" container-tag ;
+: paren-container-tag ( string -- string' ) "(" container-tag ;
+
+: parse-bracket-container ( string -- string' )
+    "[" split1 "[" prepend matching-delimiter-string
+    parse-multiline-string0 2array handle-bracket-container ;
+
+: parse-brace-container ( string -- string' )
+    "{" split1 "{" prepend matching-delimiter-string
+    parse-multiline-string0 2array handle-brace-container ;
+
+: parse-paren-container ( string -- string' )
+    "(" split1 "(" prepend matching-delimiter-string
+    parse-multiline-string0 2array handle-paren-container ;
+
+
+\ lookup-char "ch" set-container-word
+\ no-op "data-stack" set-lower-colon-word
+! USE: urls \ >url "url" set-container-word
diff --git a/core/syntax/syntax.factor b/core/syntax/syntax.factor
index 72007f9c10..43f1264920 100644
--- a/core/syntax/syntax.factor
+++ b/core/syntax/syntax.factor
@@ -128,11 +128,8 @@ IN: bootstrap.syntax
     "f" [ f suffix! ] define-core-syntax
 
     "char:" [
-        lexer get parse-raw [ "token" throw-unexpected-eof ] unless* {
-            { [ dup length 1 = ] [ first ] }
-            { [ "\\" ?head ] [ next-escape >string "" assert= ] }
-            [ name>char-hook get call( name -- char ) ]
-        } cond suffix!
+        lexer get parse-raw [ "token" throw-unexpected-eof ] unless*
+        lookup-char suffix!
     ] define-core-syntax
 
     "\"" [ parse-string suffix! ] define-core-syntax
@@ -183,16 +180,6 @@ IN: bootstrap.syntax
         scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append!
     ] define-core-syntax
 
-![[
-    "INITIALIZED-SYMBOL:" [
-        scan-new-word [ define-symbol ]
-        [
-            name>> "initialize-" prepend create-word-in dup reset-generic
-            scan-object dupd [ initialize ] curry curry ( -- ) define-declared
-        ] bi
-    ] define-core-syntax
-]]
-
     "SYMBOL:" [
         scan-new-word define-symbol
     ] define-core-syntax
diff --git a/extra/modern/slices/slices.factor b/extra/modern/slices/slices.factor
index 9406f49403..41e86fcab1 100644
--- a/extra/modern/slices/slices.factor
+++ b/extra/modern/slices/slices.factor
@@ -7,18 +7,6 @@ IN: modern.slices
 : >strings ( seq -- str )
     [ dup slice? [ >string ] when ] deep-map ;
 
-: matching-delimiter ( ch -- ch' )
-    H{
-        { ch'\( ch'\) }
-        { ch'\[ ch'\] }
-        { ch'\{ ch'\} }
-        { ch'< ch'> }
-        { ch'\: ch'\; }
-    } ?at drop ;
-
-: matching-delimiter-string ( string -- string' )
-    [ matching-delimiter ] map ;
-
 : matching-section-delimiter ( string -- string' )
     dup ":" tail? [
         rest but-last ";" ">" surround