From cbfdf2cfa89baebbb1a8e49c3cc3c1f9838842d6 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 10 Aug 2018 18:01:19 -0500 Subject: [PATCH] factor: Add more parser guts. --- core/kernel/kernel.factor | 2 + core/lexer/lexer.factor | 61 +++------ core/multiline/multiline.factor | 16 --- core/parser/parser.factor | 19 ++- core/strings/parser/parser.factor | 7 + core/syntax/modern/modern.factor | 204 ++++++++++++++++++++++++------ core/syntax/syntax.factor | 17 +-- extra/modern/slices/slices.factor | 12 -- 8 files changed, 206 insertions(+), 132 deletions(-) diff --git a/core/kernel/kernel.factor b/core/kernel/kernel.factor index 3d7010183a..ec199b29dd 100644 --- a/core/kernel/kernel.factor +++ b/core/kernel/kernel.factor @@ -531,3 +531,5 @@ PRIVATE> : get-retainstack ( -- array ) context retainstack-for ; inline + +: no-op ( obj -- obj ) ; \ No newline at end of file diff --git a/core/lexer/lexer.factor b/core/lexer/lexer.factor index 7ffde33f1c..58bf8ff342 100644 --- a/core/lexer/lexer.factor +++ b/core/lexer/lexer.factor @@ -84,8 +84,8 @@ M: lexer skip-blank GENERIC: skip-word ( lexer -- ) -: find-container-delimiter ( i str -- n/f ) - 2dup [ "[" member? ] find-from [ +: find-container-delimiter ( i str delim-str -- n/f ) + [ 2dup ] dip '[ _ member? ] find-from [ [ swap subseq [ ch'= = ] all? ] keep and ] [ 3drop f @@ -93,11 +93,19 @@ GENERIC: skip-word ( lexer -- ) M: lexer skip-word [ - 2dup [ " \"[" member? ] find-from + 2dup [ " \"[{(" member? ] find-from { { ch'\" [ 2nip 1 + ] } - { ch'\[ [ - 1 + over find-container-delimiter + { ch'\[ [ + 1 + over "[" find-container-delimiter + dup [ 2nip 1 + ] [ drop f skip ] if + ] } + { ch'\{ [ + 1 + over "{" find-container-delimiter + dup [ 2nip 1 + ] [ drop f skip ] if + ] } + { ch'\( [ + 1 + over "(" find-container-delimiter dup [ 2nip 1 + ] [ drop f skip ] if ] } [ 2drop f skip ] @@ -140,8 +148,8 @@ DEFER: parse-token : unescape-token ( string -- string' ) dup length 1 = [ "\\" ?head drop ] unless ; -: unhashtag-token ( string -- string' ) - dup length 1 = [ "#" ?head [ drop f ] when ] unless ; +: unhashtag-token ( string -- string' ? ) + dup length 1 = [ f ] [ "#" ?head >boolean ] if ; : unescape-tokens ( seq -- seq' ) [ unescape-token ] map ; @@ -149,49 +157,12 @@ DEFER: parse-token : parse-token ( lexer -- str/f ) dup parse-raw [ skip-comments ] [ drop f ] if* ; -: ?scan-token ( -- str/f ) lexer get parse-token unescape-token unhashtag-token ; +: ?scan-token ( -- str/f ) lexer get parse-token unescape-token ; PREDICATE: unexpected-eof < unexpected got>> not ; : throw-unexpected-eof ( word -- * ) f unexpected ; -: (strict-single-quote?) ( string -- ? ) - "'" split1 - [ "'" head? not ] - [ - [ length 0 > ] - [ - ! ch'\' - [ "\\'" tail? ] [ "'" tail? not ] bi or - ] bi and - ] bi* and ; - -: strict-single-quote? ( string -- ? ) - dup (strict-single-quote?) - [ "'[" sequence= not ] [ drop f ] if ; - -: strict-lower-colon? ( string -- ? ) - [ ch'\: = ] cut-tail - [ - [ length 0 > ] [ - [ [ ch'a ch'z between? ] [ "-" member? ] bi or ] all? - ] bi and ] - [ length 0 > ] bi* and ; - -: (strict-upper-colon?) ( string -- ? ) - ! All chars must... - [ - [ - [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or - ] all? - ] - ! At least one char must... - [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ; - -: strict-upper-colon? ( string -- ? ) - [ [ ch'\: = ] all? ] - [ (strict-upper-colon?) ] bi or ; - : scan-token ( -- str ) ?scan-token [ "token" throw-unexpected-eof ] unless* ; diff --git a/core/multiline/multiline.factor b/core/multiline/multiline.factor index 71ff84fc08..1d1323d3c6 100644 --- a/core/multiline/multiline.factor +++ b/core/multiline/multiline.factor @@ -31,19 +31,3 @@ PRIVATE> : parse-multiline-string0 ( end-text -- str ) lexer get 0 (parse-multiline-string) ; - -! SYNTAX: \[[ "]]" parse-multiline-string0 suffix! ; -! SYNTAX: \[=[ "]=]" parse-multiline-string0 suffix! ; -! SYNTAX: \[==[ "]==]" parse-multiline-string0 suffix! ; -! SYNTAX: \[===[ "]===]" parse-multiline-string0 suffix! ; -! SYNTAX: \[====[ "]====]" parse-multiline-string0 suffix! ; -! SYNTAX: \[=====[ "]=====]" parse-multiline-string0 suffix! ; -! SYNTAX: \[======[ "]======]" parse-multiline-string0 suffix! ; - -! SYNTAX: \![[ "]]" parse-multiline-string0 drop ; -! SYNTAX: \![=[ "]=]" parse-multiline-string0 drop ; -! SYNTAX: \![==[ "]==]" parse-multiline-string0 drop ; -! SYNTAX: \![===[ "]===]" parse-multiline-string0 drop ; -! SYNTAX: \![====[ "]====]" parse-multiline-string0 drop ; -! SYNTAX: \![=====[ "]=====]" parse-multiline-string0 drop ; -! SYNTAX: \![======[ "]======]" parse-multiline-string0 drop ; diff --git a/core/parser/parser.factor b/core/parser/parser.factor index fe502dfbc5..6e6373d6b6 100644 --- a/core/parser/parser.factor +++ b/core/parser/parser.factor @@ -71,7 +71,18 @@ DEFER: scan-object : string>new-parser ( string -- string/obj ? ) { ! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] } + ! { [ dup strict-upper-colon? ] [ parse-upper-colon t ] } + ! { [ dup strict-section? ] [ parse-section t ] } + ! { [ dup strict-named-section? ] [ parse-named-section t ] } { [ dup strict-single-quote? ] [ parse-single-quote t ] } + ! { [ dup strict-double-quote? ] [ parse-double-quote t ] } + ! { [ dup strict-bracket-container? ] [ parse-bracket-container t ] } + ! { [ dup strict-brace-container? ] [ parse-brace-container t ] } + ! { [ dup strict-paren-container? ] [ parse-paren-container t ] } + + ! { [ dup strict-bracket? ] [ parse-bracket t ] } + ! { [ dup strict-brace? ] [ parse-brace t ] } + ! { [ dup strict-paren? ] [ parse-paren t ] } [ f ] } cond ; @@ -144,8 +155,12 @@ ERROR: classoid-expected object ; } cond ; : parse-until-step ( accum end -- accum ? ) - ?scan-token string>new-parser - [ nip suffix! t ] [ (parse-until-step) ] if ; + ?scan-token + unhashtag-token + [ + string>new-parser + [ nip suffix! t ] [ (parse-until-step) ] if + ] dip [ over pop* ] [ ] if ; : (parse-until) ( accum end -- accum ) [ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ; diff --git a/core/strings/parser/parser.factor b/core/strings/parser/parser.factor index bb0deba403..caf080e36e 100644 --- a/core/strings/parser/parser.factor +++ b/core/strings/parser/parser.factor @@ -159,3 +159,10 @@ PRIVATE> lexer get (parse-string) ] keep unescape-string ] rewind-lexer-on-error ; + +: lookup-char ( char -- obj ) + { + { [ dup length 1 = ] [ first ] } + { [ "\\" ?head ] [ next-escape >string "" assert= ] } + [ name>char-hook get ( name -- char ) call-effect ] + } cond ; \ No newline at end of file diff --git a/core/syntax/modern/modern.factor b/core/syntax/modern/modern.factor index fc20faea0c..745c58c5c1 100644 --- a/core/syntax/modern/modern.factor +++ b/core/syntax/modern/modern.factor @@ -1,56 +1,49 @@ ! Copyright (C) 2018 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: assocs combinators kernel namespaces sequences splitting -strings strings.parser ; +USING: arrays assocs combinators kernel math math.order +multiline namespaces sequences splitting strings strings.parser ; IN: syntax.modern -INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ] +: matching-delimiter ( ch -- ch' ) + H{ + { ch'\( ch'\) } + { ch'\[ ch'\] } + { ch'\{ ch'\} } + { ch'< ch'> } + { ch'\: ch'\; } + } ?at drop ; + +: matching-delimiter-string ( string -- string' ) + [ matching-delimiter ] map ; + INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ] INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ] +INITIALIZED-SYMBOL: section-definitions [ H{ } clone ] +INITIALIZED-SYMBOL: named-section-definitions [ H{ } clone ] +INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ] INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ] INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ] INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ] INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ] -: define-single-quote-word ( word def -- ) swap lower-colon-definitions get set-at ; -: define-lower-colon-word ( word def -- ) swap lower-colon-definitions get set-at ; -: define-upper-colon-word ( word def -- ) swap upper-colon-definitions get set-at ; -: define-double-quote-word ( word def -- ) swap double-quote-definitions get set-at ; -: define-bracket-container-word ( word def -- ) swap bracket-container-definitions get set-at ; -: define-brace-container-word ( word def -- ) swap brace-container-definitions get set-at ; -: define-paren-container-word ( word def -- ) swap paren-container-definitions get set-at ; +: set-lower-colon-word ( word name -- ) lower-colon-definitions get set-at ; +: set-upper-colon-word ( word name -- ) upper-colon-definitions get set-at ; +: set-section-word ( word name -- ) section-definitions get set-at ; +: set-named-section-word ( word name -- ) named-section-definitions get set-at ; +: set-single-quote-word ( word name -- ) single-quote-definitions get set-at ; +: set-double-quote-word ( word name -- ) double-quote-definitions get set-at ; +: set-bracket-container-word ( word name -- ) bracket-container-definitions get set-at ; +: set-brace-container-word ( word name -- ) brace-container-definitions get set-at ; +: set-paren-container-word ( word name -- ) paren-container-definitions get set-at ; -GENERIC: lower-colon>object ( obj -- obj' ) -GENERIC: double-quote>object ( obj -- obj' ) -GENERIC: bracket-container>object ( obj -- obj' ) -GENERIC: brace-container>object ( obj -- obj' ) -GENERIC: paren-container>object ( obj -- obj' ) - -![[ - SYNTAX: LOWER-COLON: - scan-new-class - [ ] - [ tuple { "object" } define-tuple-class ] - [ '[ _ boa suffix! ] define-lower-colon-word ] tri ; -]] - - -ERROR: no-single-quote-word payload word ; -: handle-single-quote ( pair -- obj ) - first2 swap single-quote-definitions get ?at - [ execute( obj -- obj' ) ] - [ no-single-quote-word ] if ; - -: ch>object ( ch -- obj ) +: set-container-word ( word def -- ) { - { [ dup length 1 = ] [ first ] } - { [ "\\" ?head ] [ next-escape >string "" assert= ] } - [ name>char-hook get ( name -- char ) call-effect ] - } cond ; - -\ ch>object "ch" single-quote-definitions get set-at - - + [ set-single-quote-word ] + [ set-double-quote-word ] + [ set-bracket-container-word ] + [ set-brace-container-word ] + [ set-paren-container-word ] + } 2cleave ; ERROR: no-lower-colon-word payload word ; : handle-lower-colon ( pair -- obj ) @@ -58,5 +51,132 @@ ERROR: no-lower-colon-word payload word ; [ execute( obj -- obj' ) ] [ no-lower-colon-word ] if ; -: no-op ( obj -- obj' ) ; -\ no-op "data-stack" lower-colon-definitions get set-at +ERROR: no-single-quote-word payload word ; +: handle-single-quote ( pair -- obj ) + first2 swap single-quote-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-single-quote-word ] if ; + +ERROR: no-section-word payload word ; +: handle-section ( pair -- obj ) + first2 swap section-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-section-word ] if ; + +ERROR: no-named-section-word payload word ; +: handle-named-section ( pair -- obj ) + first2 swap named-section-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-named-section-word ] if ; + +ERROR: no-double-quote-word payload word ; +: handle-double-quote ( pair -- obj ) + first2 swap double-quote-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-double-quote-word ] if ; + +ERROR: no-bracket-container-word payload word ; +: handle-bracket-container ( pair -- obj ) + first2 swap bracket-container-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-bracket-container-word ] if ; + +ERROR: no-brace-container-word payload word ; +: handle-brace-container ( pair -- obj ) + first2 swap brace-container-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-brace-container-word ] if ; + +ERROR: no-paren-container-word payload word ; +: handle-paren-container ( pair -- obj ) + first2 swap paren-container-definitions get ?at + [ execute( obj -- obj' ) ] + [ no-paren-container-word ] if ; + + +: lower-char? ( str -- ? ) [ ch'a ch'z between? ] [ ch'- = ] bi or ; +: upper-char? ( str -- ? ) [ ch'A ch'Z between? ] [ ch'- = ] bi or ; + + +: strict-lower-colon? ( string -- ? ) + [ ch'\: = ] cut-tail + [ + [ length 0 > ] [ [ lower-char? ] all? ] bi and + ] [ length 0 > ] bi* and ; + + + + +: (strict-upper-colon?) ( string -- ? ) + ! All chars must... + [ + [ + [ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or + ] all? + ] + ! At least one char must... + [ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ; + +: strict-upper-colon? ( string -- ? ) + [ [ ch'\: = ] all? ] + [ (strict-upper-colon?) ] bi or ; + + +: strict-section-word? ( string -- ? ) + [ "<" head? ] + [ rest [ upper-char? ] all? ] bi and ; + +: strict-named-section-word? ( string -- ? ) + [ "<" head? ] + [ ":" tail? ] + [ rest but-last [ upper-char? ] all? ] tri and and ; + +: (strict-single-quote?) ( string -- ? ) + "'" split1 + [ "'" head? not ] + [ + [ length 0 > ] + [ + ! ch'\' + [ "\\'" tail? ] [ "'" tail? not ] bi or + ] bi and + ] bi* and ; + +: strict-single-quote? ( string -- ? ) + dup (strict-single-quote?) + [ "'[" sequence= not ] [ drop f ] if ; + +: strict-double-quote? ( string -- ? ) ?last ch'\" = ; + +: strict-container? ( string open-str -- ? ) + [ split1 ] [ split1 ] bi + [ ] + [ [ ch'= = ] all? ] + [ "" = ] tri* and and ; + +: strict-bracket-container? ( string -- ? ) "[" strict-container? ; +: strict-brace-container? ( string -- ? ) "{" strict-container? ; +: strict-paren-container? ( string -- ? ) "(" strict-container? ; + +: container-tag ( string open-str -- string' ) split1 drop ; +: double-quote-tag ( string -- string' ) "\"" split1 drop ; +: bracket-container-tag ( string -- string' ) "[" container-tag ; +: brace-container-tag ( string -- string' ) "{" container-tag ; +: paren-container-tag ( string -- string' ) "(" container-tag ; + +: parse-bracket-container ( string -- string' ) + "[" split1 "[" prepend matching-delimiter-string + parse-multiline-string0 2array handle-bracket-container ; + +: parse-brace-container ( string -- string' ) + "{" split1 "{" prepend matching-delimiter-string + parse-multiline-string0 2array handle-brace-container ; + +: parse-paren-container ( string -- string' ) + "(" split1 "(" prepend matching-delimiter-string + parse-multiline-string0 2array handle-paren-container ; + + +\ lookup-char "ch" set-container-word +\ no-op "data-stack" set-lower-colon-word +! USE: urls \ >url "url" set-container-word diff --git a/core/syntax/syntax.factor b/core/syntax/syntax.factor index 72007f9c10..43f1264920 100644 --- a/core/syntax/syntax.factor +++ b/core/syntax/syntax.factor @@ -128,11 +128,8 @@ IN: bootstrap.syntax "f" [ f suffix! ] define-core-syntax "char:" [ - lexer get parse-raw [ "token" throw-unexpected-eof ] unless* { - { [ dup length 1 = ] [ first ] } - { [ "\\" ?head ] [ next-escape >string "" assert= ] } - [ name>char-hook get call( name -- char ) ] - } cond suffix! + lexer get parse-raw [ "token" throw-unexpected-eof ] unless* + lookup-char suffix! ] define-core-syntax "\"" [ parse-string suffix! ] define-core-syntax @@ -183,16 +180,6 @@ IN: bootstrap.syntax scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append! ] define-core-syntax -![[ - "INITIALIZED-SYMBOL:" [ - scan-new-word [ define-symbol ] - [ - name>> "initialize-" prepend create-word-in dup reset-generic - scan-object dupd [ initialize ] curry curry ( -- ) define-declared - ] bi - ] define-core-syntax -]] - "SYMBOL:" [ scan-new-word define-symbol ] define-core-syntax diff --git a/extra/modern/slices/slices.factor b/extra/modern/slices/slices.factor index 9406f49403..41e86fcab1 100644 --- a/extra/modern/slices/slices.factor +++ b/extra/modern/slices/slices.factor @@ -7,18 +7,6 @@ IN: modern.slices : >strings ( seq -- str ) [ dup slice? [ >string ] when ] deep-map ; -: matching-delimiter ( ch -- ch' ) - H{ - { ch'\( ch'\) } - { ch'\[ ch'\] } - { ch'\{ ch'\} } - { ch'< ch'> } - { ch'\: ch'\; } - } ?at drop ; - -: matching-delimiter-string ( string -- string' ) - [ matching-delimiter ] map ; - : matching-section-delimiter ( string -- string' ) dup ":" tail? [ rest but-last ";" ">" surround