factor: Add more parser guts.
parent
4bcae2590c
commit
cbfdf2cfa8
|
@ -531,3 +531,5 @@ PRIVATE>
|
|||
|
||||
: get-retainstack ( -- array )
|
||||
context retainstack-for ; inline
|
||||
|
||||
: no-op ( obj -- obj ) ;
|
|
@ -84,8 +84,8 @@ M: lexer skip-blank
|
|||
|
||||
GENERIC: skip-word ( lexer -- )
|
||||
|
||||
: find-container-delimiter ( i str -- n/f )
|
||||
2dup [ "[" member? ] find-from [
|
||||
: find-container-delimiter ( i str delim-str -- n/f )
|
||||
[ 2dup ] dip '[ _ member? ] find-from [
|
||||
[ swap subseq [ ch'= = ] all? ] keep and
|
||||
] [
|
||||
3drop f
|
||||
|
@ -93,11 +93,19 @@ GENERIC: skip-word ( lexer -- )
|
|||
|
||||
M: lexer skip-word
|
||||
[
|
||||
2dup [ " \"[" member? ] find-from
|
||||
2dup [ " \"[{(" member? ] find-from
|
||||
{
|
||||
{ ch'\" [ 2nip 1 + ] }
|
||||
{ ch'\[ [
|
||||
1 + over find-container-delimiter
|
||||
1 + over "[" find-container-delimiter
|
||||
dup [ 2nip 1 + ] [ drop f skip ] if
|
||||
] }
|
||||
{ ch'\{ [
|
||||
1 + over "{" find-container-delimiter
|
||||
dup [ 2nip 1 + ] [ drop f skip ] if
|
||||
] }
|
||||
{ ch'\( [
|
||||
1 + over "(" find-container-delimiter
|
||||
dup [ 2nip 1 + ] [ drop f skip ] if
|
||||
] }
|
||||
[ 2drop f skip ]
|
||||
|
@ -140,8 +148,8 @@ DEFER: parse-token
|
|||
: unescape-token ( string -- string' )
|
||||
dup length 1 = [ "\\" ?head drop ] unless ;
|
||||
|
||||
: unhashtag-token ( string -- string' )
|
||||
dup length 1 = [ "#" ?head [ drop f ] when ] unless ;
|
||||
: unhashtag-token ( string -- string' ? )
|
||||
dup length 1 = [ f ] [ "#" ?head >boolean ] if ;
|
||||
|
||||
: unescape-tokens ( seq -- seq' )
|
||||
[ unescape-token ] map ;
|
||||
|
@ -149,49 +157,12 @@ DEFER: parse-token
|
|||
: parse-token ( lexer -- str/f )
|
||||
dup parse-raw [ skip-comments ] [ drop f ] if* ;
|
||||
|
||||
: ?scan-token ( -- str/f ) lexer get parse-token unescape-token unhashtag-token ;
|
||||
: ?scan-token ( -- str/f ) lexer get parse-token unescape-token ;
|
||||
|
||||
PREDICATE: unexpected-eof < unexpected got>> not ;
|
||||
|
||||
: throw-unexpected-eof ( word -- * ) f unexpected ;
|
||||
|
||||
: (strict-single-quote?) ( string -- ? )
|
||||
"'" split1
|
||||
[ "'" head? not ]
|
||||
[
|
||||
[ length 0 > ]
|
||||
[
|
||||
! ch'\'
|
||||
[ "\\'" tail? ] [ "'" tail? not ] bi or
|
||||
] bi and
|
||||
] bi* and ;
|
||||
|
||||
: strict-single-quote? ( string -- ? )
|
||||
dup (strict-single-quote?)
|
||||
[ "'[" sequence= not ] [ drop f ] if ;
|
||||
|
||||
: strict-lower-colon? ( string -- ? )
|
||||
[ ch'\: = ] cut-tail
|
||||
[
|
||||
[ length 0 > ] [
|
||||
[ [ ch'a ch'z between? ] [ "-" member? ] bi or ] all?
|
||||
] bi and ]
|
||||
[ length 0 > ] bi* and ;
|
||||
|
||||
: (strict-upper-colon?) ( string -- ? )
|
||||
! All chars must...
|
||||
[
|
||||
[
|
||||
[ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
|
||||
] all?
|
||||
]
|
||||
! At least one char must...
|
||||
[ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
|
||||
|
||||
: strict-upper-colon? ( string -- ? )
|
||||
[ [ ch'\: = ] all? ]
|
||||
[ (strict-upper-colon?) ] bi or ;
|
||||
|
||||
: scan-token ( -- str )
|
||||
?scan-token [ "token" throw-unexpected-eof ] unless* ;
|
||||
|
||||
|
|
|
@ -31,19 +31,3 @@ PRIVATE>
|
|||
|
||||
: parse-multiline-string0 ( end-text -- str )
|
||||
lexer get 0 (parse-multiline-string) ;
|
||||
|
||||
! SYNTAX: \[[ "]]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[=[ "]=]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[==[ "]==]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[===[ "]===]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[====[ "]====]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[=====[ "]=====]" parse-multiline-string0 suffix! ;
|
||||
! SYNTAX: \[======[ "]======]" parse-multiline-string0 suffix! ;
|
||||
|
||||
! SYNTAX: \![[ "]]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![=[ "]=]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![==[ "]==]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![===[ "]===]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![====[ "]====]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![=====[ "]=====]" parse-multiline-string0 drop ;
|
||||
! SYNTAX: \![======[ "]======]" parse-multiline-string0 drop ;
|
||||
|
|
|
@ -71,7 +71,18 @@ DEFER: scan-object
|
|||
: string>new-parser ( string -- string/obj ? )
|
||||
{
|
||||
! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] }
|
||||
! { [ dup strict-upper-colon? ] [ parse-upper-colon t ] }
|
||||
! { [ dup strict-section? ] [ parse-section t ] }
|
||||
! { [ dup strict-named-section? ] [ parse-named-section t ] }
|
||||
{ [ dup strict-single-quote? ] [ parse-single-quote t ] }
|
||||
! { [ dup strict-double-quote? ] [ parse-double-quote t ] }
|
||||
! { [ dup strict-bracket-container? ] [ parse-bracket-container t ] }
|
||||
! { [ dup strict-brace-container? ] [ parse-brace-container t ] }
|
||||
! { [ dup strict-paren-container? ] [ parse-paren-container t ] }
|
||||
|
||||
! { [ dup strict-bracket? ] [ parse-bracket t ] }
|
||||
! { [ dup strict-brace? ] [ parse-brace t ] }
|
||||
! { [ dup strict-paren? ] [ parse-paren t ] }
|
||||
[ f ]
|
||||
} cond ;
|
||||
|
||||
|
@ -144,8 +155,12 @@ ERROR: classoid-expected object ;
|
|||
} cond ;
|
||||
|
||||
: parse-until-step ( accum end -- accum ? )
|
||||
?scan-token string>new-parser
|
||||
[ nip suffix! t ] [ (parse-until-step) ] if ;
|
||||
?scan-token
|
||||
unhashtag-token
|
||||
[
|
||||
string>new-parser
|
||||
[ nip suffix! t ] [ (parse-until-step) ] if
|
||||
] dip [ over pop* ] [ ] if ;
|
||||
|
||||
: (parse-until) ( accum end -- accum )
|
||||
[ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ;
|
||||
|
|
|
@ -159,3 +159,10 @@ PRIVATE>
|
|||
lexer get (parse-string)
|
||||
] keep unescape-string
|
||||
] rewind-lexer-on-error ;
|
||||
|
||||
: lookup-char ( char -- obj )
|
||||
{
|
||||
{ [ dup length 1 = ] [ first ] }
|
||||
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
|
||||
[ name>char-hook get ( name -- char ) call-effect ]
|
||||
} cond ;
|
|
@ -1,56 +1,49 @@
|
|||
! Copyright (C) 2018 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs combinators kernel namespaces sequences splitting
|
||||
strings strings.parser ;
|
||||
USING: arrays assocs combinators kernel math math.order
|
||||
multiline namespaces sequences splitting strings strings.parser ;
|
||||
IN: syntax.modern
|
||||
|
||||
INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
|
||||
: matching-delimiter ( ch -- ch' )
|
||||
H{
|
||||
{ ch'\( ch'\) }
|
||||
{ ch'\[ ch'\] }
|
||||
{ ch'\{ ch'\} }
|
||||
{ ch'< ch'> }
|
||||
{ ch'\: ch'\; }
|
||||
} ?at drop ;
|
||||
|
||||
: matching-delimiter-string ( string -- string' )
|
||||
[ matching-delimiter ] map ;
|
||||
|
||||
INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: section-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: named-section-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ]
|
||||
INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ]
|
||||
|
||||
: define-single-quote-word ( word def -- ) swap lower-colon-definitions get set-at ;
|
||||
: define-lower-colon-word ( word def -- ) swap lower-colon-definitions get set-at ;
|
||||
: define-upper-colon-word ( word def -- ) swap upper-colon-definitions get set-at ;
|
||||
: define-double-quote-word ( word def -- ) swap double-quote-definitions get set-at ;
|
||||
: define-bracket-container-word ( word def -- ) swap bracket-container-definitions get set-at ;
|
||||
: define-brace-container-word ( word def -- ) swap brace-container-definitions get set-at ;
|
||||
: define-paren-container-word ( word def -- ) swap paren-container-definitions get set-at ;
|
||||
: set-lower-colon-word ( word name -- ) lower-colon-definitions get set-at ;
|
||||
: set-upper-colon-word ( word name -- ) upper-colon-definitions get set-at ;
|
||||
: set-section-word ( word name -- ) section-definitions get set-at ;
|
||||
: set-named-section-word ( word name -- ) named-section-definitions get set-at ;
|
||||
: set-single-quote-word ( word name -- ) single-quote-definitions get set-at ;
|
||||
: set-double-quote-word ( word name -- ) double-quote-definitions get set-at ;
|
||||
: set-bracket-container-word ( word name -- ) bracket-container-definitions get set-at ;
|
||||
: set-brace-container-word ( word name -- ) brace-container-definitions get set-at ;
|
||||
: set-paren-container-word ( word name -- ) paren-container-definitions get set-at ;
|
||||
|
||||
GENERIC: lower-colon>object ( obj -- obj' )
|
||||
GENERIC: double-quote>object ( obj -- obj' )
|
||||
GENERIC: bracket-container>object ( obj -- obj' )
|
||||
GENERIC: brace-container>object ( obj -- obj' )
|
||||
GENERIC: paren-container>object ( obj -- obj' )
|
||||
|
||||
![[
|
||||
SYNTAX: LOWER-COLON:
|
||||
scan-new-class
|
||||
[ ]
|
||||
[ tuple { "object" } define-tuple-class ]
|
||||
[ '[ _ boa suffix! ] define-lower-colon-word ] tri ;
|
||||
]]
|
||||
|
||||
|
||||
ERROR: no-single-quote-word payload word ;
|
||||
: handle-single-quote ( pair -- obj )
|
||||
first2 swap single-quote-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-single-quote-word ] if ;
|
||||
|
||||
: ch>object ( ch -- obj )
|
||||
: set-container-word ( word def -- )
|
||||
{
|
||||
{ [ dup length 1 = ] [ first ] }
|
||||
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
|
||||
[ name>char-hook get ( name -- char ) call-effect ]
|
||||
} cond ;
|
||||
|
||||
\ ch>object "ch" single-quote-definitions get set-at
|
||||
|
||||
|
||||
[ set-single-quote-word ]
|
||||
[ set-double-quote-word ]
|
||||
[ set-bracket-container-word ]
|
||||
[ set-brace-container-word ]
|
||||
[ set-paren-container-word ]
|
||||
} 2cleave ;
|
||||
|
||||
ERROR: no-lower-colon-word payload word ;
|
||||
: handle-lower-colon ( pair -- obj )
|
||||
|
@ -58,5 +51,132 @@ ERROR: no-lower-colon-word payload word ;
|
|||
[ execute( obj -- obj' ) ]
|
||||
[ no-lower-colon-word ] if ;
|
||||
|
||||
: no-op ( obj -- obj' ) ;
|
||||
\ no-op "data-stack" lower-colon-definitions get set-at
|
||||
ERROR: no-single-quote-word payload word ;
|
||||
: handle-single-quote ( pair -- obj )
|
||||
first2 swap single-quote-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-single-quote-word ] if ;
|
||||
|
||||
ERROR: no-section-word payload word ;
|
||||
: handle-section ( pair -- obj )
|
||||
first2 swap section-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-section-word ] if ;
|
||||
|
||||
ERROR: no-named-section-word payload word ;
|
||||
: handle-named-section ( pair -- obj )
|
||||
first2 swap named-section-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-named-section-word ] if ;
|
||||
|
||||
ERROR: no-double-quote-word payload word ;
|
||||
: handle-double-quote ( pair -- obj )
|
||||
first2 swap double-quote-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-double-quote-word ] if ;
|
||||
|
||||
ERROR: no-bracket-container-word payload word ;
|
||||
: handle-bracket-container ( pair -- obj )
|
||||
first2 swap bracket-container-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-bracket-container-word ] if ;
|
||||
|
||||
ERROR: no-brace-container-word payload word ;
|
||||
: handle-brace-container ( pair -- obj )
|
||||
first2 swap brace-container-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-brace-container-word ] if ;
|
||||
|
||||
ERROR: no-paren-container-word payload word ;
|
||||
: handle-paren-container ( pair -- obj )
|
||||
first2 swap paren-container-definitions get ?at
|
||||
[ execute( obj -- obj' ) ]
|
||||
[ no-paren-container-word ] if ;
|
||||
|
||||
|
||||
: lower-char? ( str -- ? ) [ ch'a ch'z between? ] [ ch'- = ] bi or ;
|
||||
: upper-char? ( str -- ? ) [ ch'A ch'Z between? ] [ ch'- = ] bi or ;
|
||||
|
||||
|
||||
: strict-lower-colon? ( string -- ? )
|
||||
[ ch'\: = ] cut-tail
|
||||
[
|
||||
[ length 0 > ] [ [ lower-char? ] all? ] bi and
|
||||
] [ length 0 > ] bi* and ;
|
||||
|
||||
|
||||
|
||||
|
||||
: (strict-upper-colon?) ( string -- ? )
|
||||
! All chars must...
|
||||
[
|
||||
[
|
||||
[ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
|
||||
] all?
|
||||
]
|
||||
! At least one char must...
|
||||
[ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
|
||||
|
||||
: strict-upper-colon? ( string -- ? )
|
||||
[ [ ch'\: = ] all? ]
|
||||
[ (strict-upper-colon?) ] bi or ;
|
||||
|
||||
|
||||
: strict-section-word? ( string -- ? )
|
||||
[ "<" head? ]
|
||||
[ rest [ upper-char? ] all? ] bi and ;
|
||||
|
||||
: strict-named-section-word? ( string -- ? )
|
||||
[ "<" head? ]
|
||||
[ ":" tail? ]
|
||||
[ rest but-last [ upper-char? ] all? ] tri and and ;
|
||||
|
||||
: (strict-single-quote?) ( string -- ? )
|
||||
"'" split1
|
||||
[ "'" head? not ]
|
||||
[
|
||||
[ length 0 > ]
|
||||
[
|
||||
! ch'\'
|
||||
[ "\\'" tail? ] [ "'" tail? not ] bi or
|
||||
] bi and
|
||||
] bi* and ;
|
||||
|
||||
: strict-single-quote? ( string -- ? )
|
||||
dup (strict-single-quote?)
|
||||
[ "'[" sequence= not ] [ drop f ] if ;
|
||||
|
||||
: strict-double-quote? ( string -- ? ) ?last ch'\" = ;
|
||||
|
||||
: strict-container? ( string open-str -- ? )
|
||||
[ split1 ] [ split1 ] bi
|
||||
[ ]
|
||||
[ [ ch'= = ] all? ]
|
||||
[ "" = ] tri* and and ;
|
||||
|
||||
: strict-bracket-container? ( string -- ? ) "[" strict-container? ;
|
||||
: strict-brace-container? ( string -- ? ) "{" strict-container? ;
|
||||
: strict-paren-container? ( string -- ? ) "(" strict-container? ;
|
||||
|
||||
: container-tag ( string open-str -- string' ) split1 drop ;
|
||||
: double-quote-tag ( string -- string' ) "\"" split1 drop ;
|
||||
: bracket-container-tag ( string -- string' ) "[" container-tag ;
|
||||
: brace-container-tag ( string -- string' ) "{" container-tag ;
|
||||
: paren-container-tag ( string -- string' ) "(" container-tag ;
|
||||
|
||||
: parse-bracket-container ( string -- string' )
|
||||
"[" split1 "[" prepend matching-delimiter-string
|
||||
parse-multiline-string0 2array handle-bracket-container ;
|
||||
|
||||
: parse-brace-container ( string -- string' )
|
||||
"{" split1 "{" prepend matching-delimiter-string
|
||||
parse-multiline-string0 2array handle-brace-container ;
|
||||
|
||||
: parse-paren-container ( string -- string' )
|
||||
"(" split1 "(" prepend matching-delimiter-string
|
||||
parse-multiline-string0 2array handle-paren-container ;
|
||||
|
||||
|
||||
\ lookup-char "ch" set-container-word
|
||||
\ no-op "data-stack" set-lower-colon-word
|
||||
! USE: urls \ >url "url" set-container-word
|
||||
|
|
|
@ -128,11 +128,8 @@ IN: bootstrap.syntax
|
|||
"f" [ f suffix! ] define-core-syntax
|
||||
|
||||
"char:" [
|
||||
lexer get parse-raw [ "token" throw-unexpected-eof ] unless* {
|
||||
{ [ dup length 1 = ] [ first ] }
|
||||
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
|
||||
[ name>char-hook get call( name -- char ) ]
|
||||
} cond suffix!
|
||||
lexer get parse-raw [ "token" throw-unexpected-eof ] unless*
|
||||
lookup-char suffix!
|
||||
] define-core-syntax
|
||||
|
||||
"\"" [ parse-string suffix! ] define-core-syntax
|
||||
|
@ -183,16 +180,6 @@ IN: bootstrap.syntax
|
|||
scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append!
|
||||
] define-core-syntax
|
||||
|
||||
![[
|
||||
"INITIALIZED-SYMBOL:" [
|
||||
scan-new-word [ define-symbol ]
|
||||
[
|
||||
name>> "initialize-" prepend create-word-in dup reset-generic
|
||||
scan-object dupd [ initialize ] curry curry ( -- ) define-declared
|
||||
] bi
|
||||
] define-core-syntax
|
||||
]]
|
||||
|
||||
"SYMBOL:" [
|
||||
scan-new-word define-symbol
|
||||
] define-core-syntax
|
||||
|
|
|
@ -7,18 +7,6 @@ IN: modern.slices
|
|||
: >strings ( seq -- str )
|
||||
[ dup slice? [ >string ] when ] deep-map ;
|
||||
|
||||
: matching-delimiter ( ch -- ch' )
|
||||
H{
|
||||
{ ch'\( ch'\) }
|
||||
{ ch'\[ ch'\] }
|
||||
{ ch'\{ ch'\} }
|
||||
{ ch'< ch'> }
|
||||
{ ch'\: ch'\; }
|
||||
} ?at drop ;
|
||||
|
||||
: matching-delimiter-string ( string -- string' )
|
||||
[ matching-delimiter ] map ;
|
||||
|
||||
: matching-section-delimiter ( string -- string' )
|
||||
dup ":" tail? [
|
||||
rest but-last ";" ">" surround
|
||||
|
|
Loading…
Reference in New Issue