factor: Add more parser guts.

modern-harvey3-triple
Doug Coleman 2018-08-10 18:01:19 -05:00
parent 4bcae2590c
commit cbfdf2cfa8
8 changed files with 206 additions and 132 deletions

View File

@ -531,3 +531,5 @@ PRIVATE>
: get-retainstack ( -- array ) : get-retainstack ( -- array )
context retainstack-for ; inline context retainstack-for ; inline
: no-op ( obj -- obj ) ;

View File

@ -84,8 +84,8 @@ M: lexer skip-blank
GENERIC: skip-word ( lexer -- ) GENERIC: skip-word ( lexer -- )
: find-container-delimiter ( i str -- n/f ) : find-container-delimiter ( i str delim-str -- n/f )
2dup [ "[" member? ] find-from [ [ 2dup ] dip '[ _ member? ] find-from [
[ swap subseq [ ch'= = ] all? ] keep and [ swap subseq [ ch'= = ] all? ] keep and
] [ ] [
3drop f 3drop f
@ -93,11 +93,19 @@ GENERIC: skip-word ( lexer -- )
M: lexer skip-word M: lexer skip-word
[ [
2dup [ " \"[" member? ] find-from 2dup [ " \"[{(" member? ] find-from
{ {
{ ch'\" [ 2nip 1 + ] } { ch'\" [ 2nip 1 + ] }
{ ch'\[ [ { ch'\[ [
1 + over find-container-delimiter 1 + over "[" find-container-delimiter
dup [ 2nip 1 + ] [ drop f skip ] if
] }
{ ch'\{ [
1 + over "{" find-container-delimiter
dup [ 2nip 1 + ] [ drop f skip ] if
] }
{ ch'\( [
1 + over "(" find-container-delimiter
dup [ 2nip 1 + ] [ drop f skip ] if dup [ 2nip 1 + ] [ drop f skip ] if
] } ] }
[ 2drop f skip ] [ 2drop f skip ]
@ -140,8 +148,8 @@ DEFER: parse-token
: unescape-token ( string -- string' ) : unescape-token ( string -- string' )
dup length 1 = [ "\\" ?head drop ] unless ; dup length 1 = [ "\\" ?head drop ] unless ;
: unhashtag-token ( string -- string' ) : unhashtag-token ( string -- string' ? )
dup length 1 = [ "#" ?head [ drop f ] when ] unless ; dup length 1 = [ f ] [ "#" ?head >boolean ] if ;
: unescape-tokens ( seq -- seq' ) : unescape-tokens ( seq -- seq' )
[ unescape-token ] map ; [ unescape-token ] map ;
@ -149,49 +157,12 @@ DEFER: parse-token
: parse-token ( lexer -- str/f ) : parse-token ( lexer -- str/f )
dup parse-raw [ skip-comments ] [ drop f ] if* ; dup parse-raw [ skip-comments ] [ drop f ] if* ;
: ?scan-token ( -- str/f ) lexer get parse-token unescape-token unhashtag-token ; : ?scan-token ( -- str/f ) lexer get parse-token unescape-token ;
PREDICATE: unexpected-eof < unexpected got>> not ; PREDICATE: unexpected-eof < unexpected got>> not ;
: throw-unexpected-eof ( word -- * ) f unexpected ; : throw-unexpected-eof ( word -- * ) f unexpected ;
: (strict-single-quote?) ( string -- ? )
"'" split1
[ "'" head? not ]
[
[ length 0 > ]
[
! ch'\'
[ "\\'" tail? ] [ "'" tail? not ] bi or
] bi and
] bi* and ;
: strict-single-quote? ( string -- ? )
dup (strict-single-quote?)
[ "'[" sequence= not ] [ drop f ] if ;
: strict-lower-colon? ( string -- ? )
[ ch'\: = ] cut-tail
[
[ length 0 > ] [
[ [ ch'a ch'z between? ] [ "-" member? ] bi or ] all?
] bi and ]
[ length 0 > ] bi* and ;
: (strict-upper-colon?) ( string -- ? )
! All chars must...
[
[
[ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
] all?
]
! At least one char must...
[ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
: strict-upper-colon? ( string -- ? )
[ [ ch'\: = ] all? ]
[ (strict-upper-colon?) ] bi or ;
: scan-token ( -- str ) : scan-token ( -- str )
?scan-token [ "token" throw-unexpected-eof ] unless* ; ?scan-token [ "token" throw-unexpected-eof ] unless* ;

View File

@ -31,19 +31,3 @@ PRIVATE>
: parse-multiline-string0 ( end-text -- str ) : parse-multiline-string0 ( end-text -- str )
lexer get 0 (parse-multiline-string) ; lexer get 0 (parse-multiline-string) ;
! SYNTAX: \[[ "]]" parse-multiline-string0 suffix! ;
! SYNTAX: \[=[ "]=]" parse-multiline-string0 suffix! ;
! SYNTAX: \[==[ "]==]" parse-multiline-string0 suffix! ;
! SYNTAX: \[===[ "]===]" parse-multiline-string0 suffix! ;
! SYNTAX: \[====[ "]====]" parse-multiline-string0 suffix! ;
! SYNTAX: \[=====[ "]=====]" parse-multiline-string0 suffix! ;
! SYNTAX: \[======[ "]======]" parse-multiline-string0 suffix! ;
! SYNTAX: \![[ "]]" parse-multiline-string0 drop ;
! SYNTAX: \![=[ "]=]" parse-multiline-string0 drop ;
! SYNTAX: \![==[ "]==]" parse-multiline-string0 drop ;
! SYNTAX: \![===[ "]===]" parse-multiline-string0 drop ;
! SYNTAX: \![====[ "]====]" parse-multiline-string0 drop ;
! SYNTAX: \![=====[ "]=====]" parse-multiline-string0 drop ;
! SYNTAX: \![======[ "]======]" parse-multiline-string0 drop ;

View File

@ -71,7 +71,18 @@ DEFER: scan-object
: string>new-parser ( string -- string/obj ? ) : string>new-parser ( string -- string/obj ? )
{ {
! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] } ! { [ dup strict-lower-colon? ] [ parse-lower-colon2 t ] }
! { [ dup strict-upper-colon? ] [ parse-upper-colon t ] }
! { [ dup strict-section? ] [ parse-section t ] }
! { [ dup strict-named-section? ] [ parse-named-section t ] }
{ [ dup strict-single-quote? ] [ parse-single-quote t ] } { [ dup strict-single-quote? ] [ parse-single-quote t ] }
! { [ dup strict-double-quote? ] [ parse-double-quote t ] }
! { [ dup strict-bracket-container? ] [ parse-bracket-container t ] }
! { [ dup strict-brace-container? ] [ parse-brace-container t ] }
! { [ dup strict-paren-container? ] [ parse-paren-container t ] }
! { [ dup strict-bracket? ] [ parse-bracket t ] }
! { [ dup strict-brace? ] [ parse-brace t ] }
! { [ dup strict-paren? ] [ parse-paren t ] }
[ f ] [ f ]
} cond ; } cond ;
@ -144,8 +155,12 @@ ERROR: classoid-expected object ;
} cond ; } cond ;
: parse-until-step ( accum end -- accum ? ) : parse-until-step ( accum end -- accum ? )
?scan-token string>new-parser ?scan-token
[ nip suffix! t ] [ (parse-until-step) ] if ; unhashtag-token
[
string>new-parser
[ nip suffix! t ] [ (parse-until-step) ] if
] dip [ over pop* ] [ ] if ;
: (parse-until) ( accum end -- accum ) : (parse-until) ( accum end -- accum )
[ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ; [ parse-until-step ] keep swap [ (parse-until) ] [ drop ] if ;

View File

@ -159,3 +159,10 @@ PRIVATE>
lexer get (parse-string) lexer get (parse-string)
] keep unescape-string ] keep unescape-string
] rewind-lexer-on-error ; ] rewind-lexer-on-error ;
: lookup-char ( char -- obj )
{
{ [ dup length 1 = ] [ first ] }
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
[ name>char-hook get ( name -- char ) call-effect ]
} cond ;

View File

@ -1,56 +1,49 @@
! Copyright (C) 2018 Doug Coleman. ! Copyright (C) 2018 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs combinators kernel namespaces sequences splitting USING: arrays assocs combinators kernel math math.order
strings strings.parser ; multiline namespaces sequences splitting strings strings.parser ;
IN: syntax.modern IN: syntax.modern
INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ] : matching-delimiter ( ch -- ch' )
H{
{ ch'\( ch'\) }
{ ch'\[ ch'\] }
{ ch'\{ ch'\} }
{ ch'< ch'> }
{ ch'\: ch'\; }
} ?at drop ;
: matching-delimiter-string ( string -- string' )
[ matching-delimiter ] map ;
INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ] INITIALIZED-SYMBOL: lower-colon-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ] INITIALIZED-SYMBOL: upper-colon-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: section-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: named-section-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: single-quote-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ] INITIALIZED-SYMBOL: double-quote-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ] INITIALIZED-SYMBOL: bracket-container-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ] INITIALIZED-SYMBOL: brace-container-definitions [ H{ } clone ]
INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ] INITIALIZED-SYMBOL: paren-container-definitions [ H{ } clone ]
: define-single-quote-word ( word def -- ) swap lower-colon-definitions get set-at ; : set-lower-colon-word ( word name -- ) lower-colon-definitions get set-at ;
: define-lower-colon-word ( word def -- ) swap lower-colon-definitions get set-at ; : set-upper-colon-word ( word name -- ) upper-colon-definitions get set-at ;
: define-upper-colon-word ( word def -- ) swap upper-colon-definitions get set-at ; : set-section-word ( word name -- ) section-definitions get set-at ;
: define-double-quote-word ( word def -- ) swap double-quote-definitions get set-at ; : set-named-section-word ( word name -- ) named-section-definitions get set-at ;
: define-bracket-container-word ( word def -- ) swap bracket-container-definitions get set-at ; : set-single-quote-word ( word name -- ) single-quote-definitions get set-at ;
: define-brace-container-word ( word def -- ) swap brace-container-definitions get set-at ; : set-double-quote-word ( word name -- ) double-quote-definitions get set-at ;
: define-paren-container-word ( word def -- ) swap paren-container-definitions get set-at ; : set-bracket-container-word ( word name -- ) bracket-container-definitions get set-at ;
: set-brace-container-word ( word name -- ) brace-container-definitions get set-at ;
: set-paren-container-word ( word name -- ) paren-container-definitions get set-at ;
GENERIC: lower-colon>object ( obj -- obj' ) : set-container-word ( word def -- )
GENERIC: double-quote>object ( obj -- obj' )
GENERIC: bracket-container>object ( obj -- obj' )
GENERIC: brace-container>object ( obj -- obj' )
GENERIC: paren-container>object ( obj -- obj' )
![[
SYNTAX: LOWER-COLON:
scan-new-class
[ ]
[ tuple { "object" } define-tuple-class ]
[ '[ _ boa suffix! ] define-lower-colon-word ] tri ;
]]
ERROR: no-single-quote-word payload word ;
: handle-single-quote ( pair -- obj )
first2 swap single-quote-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-single-quote-word ] if ;
: ch>object ( ch -- obj )
{ {
{ [ dup length 1 = ] [ first ] } [ set-single-quote-word ]
{ [ "\\" ?head ] [ next-escape >string "" assert= ] } [ set-double-quote-word ]
[ name>char-hook get ( name -- char ) call-effect ] [ set-bracket-container-word ]
} cond ; [ set-brace-container-word ]
[ set-paren-container-word ]
\ ch>object "ch" single-quote-definitions get set-at } 2cleave ;
ERROR: no-lower-colon-word payload word ; ERROR: no-lower-colon-word payload word ;
: handle-lower-colon ( pair -- obj ) : handle-lower-colon ( pair -- obj )
@ -58,5 +51,132 @@ ERROR: no-lower-colon-word payload word ;
[ execute( obj -- obj' ) ] [ execute( obj -- obj' ) ]
[ no-lower-colon-word ] if ; [ no-lower-colon-word ] if ;
: no-op ( obj -- obj' ) ; ERROR: no-single-quote-word payload word ;
\ no-op "data-stack" lower-colon-definitions get set-at : handle-single-quote ( pair -- obj )
first2 swap single-quote-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-single-quote-word ] if ;
ERROR: no-section-word payload word ;
: handle-section ( pair -- obj )
first2 swap section-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-section-word ] if ;
ERROR: no-named-section-word payload word ;
: handle-named-section ( pair -- obj )
first2 swap named-section-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-named-section-word ] if ;
ERROR: no-double-quote-word payload word ;
: handle-double-quote ( pair -- obj )
first2 swap double-quote-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-double-quote-word ] if ;
ERROR: no-bracket-container-word payload word ;
: handle-bracket-container ( pair -- obj )
first2 swap bracket-container-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-bracket-container-word ] if ;
ERROR: no-brace-container-word payload word ;
: handle-brace-container ( pair -- obj )
first2 swap brace-container-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-brace-container-word ] if ;
ERROR: no-paren-container-word payload word ;
: handle-paren-container ( pair -- obj )
first2 swap paren-container-definitions get ?at
[ execute( obj -- obj' ) ]
[ no-paren-container-word ] if ;
: lower-char? ( str -- ? ) [ ch'a ch'z between? ] [ ch'- = ] bi or ;
: upper-char? ( str -- ? ) [ ch'A ch'Z between? ] [ ch'- = ] bi or ;
: strict-lower-colon? ( string -- ? )
[ ch'\: = ] cut-tail
[
[ length 0 > ] [ [ lower-char? ] all? ] bi and
] [ length 0 > ] bi* and ;
: (strict-upper-colon?) ( string -- ? )
! All chars must...
[
[
[ ch'A ch'Z between? ] [ "':-\\#" member? ] bi or
] all?
]
! At least one char must...
[ [ [ ch'A ch'Z between? ] [ ch'\' = ] bi or ] any? ] bi and ;
: strict-upper-colon? ( string -- ? )
[ [ ch'\: = ] all? ]
[ (strict-upper-colon?) ] bi or ;
: strict-section-word? ( string -- ? )
[ "<" head? ]
[ rest [ upper-char? ] all? ] bi and ;
: strict-named-section-word? ( string -- ? )
[ "<" head? ]
[ ":" tail? ]
[ rest but-last [ upper-char? ] all? ] tri and and ;
: (strict-single-quote?) ( string -- ? )
"'" split1
[ "'" head? not ]
[
[ length 0 > ]
[
! ch'\'
[ "\\'" tail? ] [ "'" tail? not ] bi or
] bi and
] bi* and ;
: strict-single-quote? ( string -- ? )
dup (strict-single-quote?)
[ "'[" sequence= not ] [ drop f ] if ;
: strict-double-quote? ( string -- ? ) ?last ch'\" = ;
: strict-container? ( string open-str -- ? )
[ split1 ] [ split1 ] bi
[ ]
[ [ ch'= = ] all? ]
[ "" = ] tri* and and ;
: strict-bracket-container? ( string -- ? ) "[" strict-container? ;
: strict-brace-container? ( string -- ? ) "{" strict-container? ;
: strict-paren-container? ( string -- ? ) "(" strict-container? ;
: container-tag ( string open-str -- string' ) split1 drop ;
: double-quote-tag ( string -- string' ) "\"" split1 drop ;
: bracket-container-tag ( string -- string' ) "[" container-tag ;
: brace-container-tag ( string -- string' ) "{" container-tag ;
: paren-container-tag ( string -- string' ) "(" container-tag ;
: parse-bracket-container ( string -- string' )
"[" split1 "[" prepend matching-delimiter-string
parse-multiline-string0 2array handle-bracket-container ;
: parse-brace-container ( string -- string' )
"{" split1 "{" prepend matching-delimiter-string
parse-multiline-string0 2array handle-brace-container ;
: parse-paren-container ( string -- string' )
"(" split1 "(" prepend matching-delimiter-string
parse-multiline-string0 2array handle-paren-container ;
\ lookup-char "ch" set-container-word
\ no-op "data-stack" set-lower-colon-word
! USE: urls \ >url "url" set-container-word

View File

@ -128,11 +128,8 @@ IN: bootstrap.syntax
"f" [ f suffix! ] define-core-syntax "f" [ f suffix! ] define-core-syntax
"char:" [ "char:" [
lexer get parse-raw [ "token" throw-unexpected-eof ] unless* { lexer get parse-raw [ "token" throw-unexpected-eof ] unless*
{ [ dup length 1 = ] [ first ] } lookup-char suffix!
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
[ name>char-hook get call( name -- char ) ]
} cond suffix!
] define-core-syntax ] define-core-syntax
"\"" [ parse-string suffix! ] define-core-syntax "\"" [ parse-string suffix! ] define-core-syntax
@ -183,16 +180,6 @@ IN: bootstrap.syntax
scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append! scan-new-word [ define-symbol ] keep scan-object '[ _ _ initialize ] append!
] define-core-syntax ] define-core-syntax
![[
"INITIALIZED-SYMBOL:" [
scan-new-word [ define-symbol ]
[
name>> "initialize-" prepend create-word-in dup reset-generic
scan-object dupd [ initialize ] curry curry ( -- ) define-declared
] bi
] define-core-syntax
]]
"SYMBOL:" [ "SYMBOL:" [
scan-new-word define-symbol scan-new-word define-symbol
] define-core-syntax ] define-core-syntax

View File

@ -7,18 +7,6 @@ IN: modern.slices
: >strings ( seq -- str ) : >strings ( seq -- str )
[ dup slice? [ >string ] when ] deep-map ; [ dup slice? [ >string ] when ] deep-map ;
: matching-delimiter ( ch -- ch' )
H{
{ ch'\( ch'\) }
{ ch'\[ ch'\] }
{ ch'\{ ch'\} }
{ ch'< ch'> }
{ ch'\: ch'\; }
} ?at drop ;
: matching-delimiter-string ( string -- string' )
[ matching-delimiter ] map ;
: matching-section-delimiter ( string -- string' ) : matching-section-delimiter ( string -- string' )
dup ":" tail? [ dup ":" tail? [
rest but-last ";" ">" surround rest but-last ";" ">" surround