factor/core/parser/parser.factor

547 lines
13 KiB
Factor
Raw Normal View History

2008-02-01 18:48:29 -05:00
! Copyright (C) 2005, 2008 Slava Pestov.
2007-09-20 18:09:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
USING: arrays definitions generic assocs kernel math
namespaces prettyprint sequences strings vectors words
quotations inspector io.styles io combinators sorting
splitting math.parser effects continuations debugger
2008-02-21 19:05:04 -05:00
io.files io.streams.string vocabs io.encodings.utf8
source-files classes hashtables compiler.errors compiler.units
accessors ;
2007-09-20 18:09:08 -04:00
IN: parser
2008-02-05 23:36:10 -05:00
TUPLE: lexer text line line-text line-length column ;
2007-09-20 18:09:08 -04:00
2008-02-05 23:36:10 -05:00
: next-line ( lexer -- )
dup [ line>> ] [ text>> ] bi ?nth >>line-text
dup line-text>> length >>line-length
[ 1+ ] change-line
0 >>column
drop ;
2007-09-20 18:09:08 -04:00
2008-02-05 23:36:10 -05:00
: <lexer> ( text -- lexer )
0 { set-lexer-text set-lexer-line } lexer construct
2008-02-08 02:48:51 -05:00
dup next-line ;
2007-09-20 18:09:08 -04:00
: location ( -- loc )
file get lexer get lexer-line 2dup and
[ >r source-file-path r> 2array ] [ 2drop f ] if ;
: save-location ( definition -- )
2007-12-24 17:18:26 -05:00
location remember-definition ;
: save-class-location ( class -- )
location remember-class ;
2007-09-20 18:09:08 -04:00
SYMBOL: parser-notes
t parser-notes set-global
: parser-notes? ( -- ? )
parser-notes get "quiet" get not and ;
: file. ( file -- )
[
source-file-path <pathname> pprint
] [
"<interactive>" write
] if* ":" write ;
: note. ( str -- )
parser-notes? [
file get file.
lexer get [
lexer-line number>string print
] [
nl
] if*
"Note: " write dup print
] when drop ;
2008-02-01 18:48:29 -05:00
: skip ( i seq ? -- n )
over >r
[ swap CHAR: \s eq? xor ] curry find* drop
2008-02-05 23:36:10 -05:00
[ r> drop ] [ r> length ] if* ;
2007-09-20 18:09:08 -04:00
2008-03-20 20:54:25 -04:00
: change-lexer-column ( lexer quot -- )
2007-09-20 18:09:08 -04:00
swap
2008-02-05 23:36:10 -05:00
[ dup lexer-column swap lexer-line-text rot call ] keep
2007-09-20 18:09:08 -04:00
set-lexer-column ; inline
GENERIC: skip-blank ( lexer -- )
M: lexer skip-blank ( lexer -- )
2008-03-20 20:54:25 -04:00
[ t skip ] change-lexer-column ;
2007-09-20 18:09:08 -04:00
GENERIC: skip-word ( lexer -- )
M: lexer skip-word ( lexer -- )
[
2008-02-05 23:36:10 -05:00
2dup nth CHAR: " eq? [ drop 1+ ] [ f skip ] if
2008-03-20 20:54:25 -04:00
] change-lexer-column ;
2007-09-20 18:09:08 -04:00
: still-parsing? ( lexer -- ? )
dup lexer-line swap lexer-text length <= ;
: still-parsing-line? ( lexer -- ? )
2008-02-05 23:36:10 -05:00
dup lexer-column swap lexer-line-length < ;
2007-09-20 18:09:08 -04:00
: (parse-token) ( lexer -- str )
[ lexer-column ] keep
[ skip-word ] keep
[ lexer-column ] keep
2008-02-05 23:36:10 -05:00
lexer-line-text subseq ;
2007-09-20 18:09:08 -04:00
: parse-token ( lexer -- str/f )
dup still-parsing? [
dup skip-blank
dup still-parsing-line?
[ (parse-token) ] [ dup next-line parse-token ] if
] [ drop f ] if ;
: scan ( -- str/f ) lexer get parse-token ;
2008-03-20 16:00:49 -04:00
ERROR: bad-escape ;
2007-09-20 18:09:08 -04:00
M: bad-escape summary drop "Bad escape code" ;
: escape ( escape -- ch )
H{
2008-02-11 15:19:47 -05:00
{ CHAR: a CHAR: \a }
2007-09-20 18:09:08 -04:00
{ CHAR: e CHAR: \e }
{ CHAR: n CHAR: \n }
{ CHAR: r CHAR: \r }
{ CHAR: t CHAR: \t }
{ CHAR: s CHAR: \s }
{ CHAR: \s CHAR: \s }
{ CHAR: 0 CHAR: \0 }
{ CHAR: \\ CHAR: \\ }
{ CHAR: \" CHAR: \" }
} at [ bad-escape ] unless* ;
2008-02-15 20:32:14 -05:00
SYMBOL: name>char-hook
2007-09-20 18:09:08 -04:00
2008-02-15 20:32:14 -05:00
name>char-hook global [
[ "Unicode support not available" throw ] or
] change-at
2007-09-20 18:09:08 -04:00
2008-02-15 20:32:14 -05:00
: unicode-escape ( str -- ch str' )
"{" ?head-slice [
CHAR: } over index cut-slice
>r >string name>char-hook get call r>
1 tail-slice
] [
6 cut-slice >r hex> r>
] if ;
: next-escape ( str -- ch str' )
"u" ?head-slice [
unicode-escape
] [
unclip-slice escape swap
] if ;
: (parse-string) ( str -- m )
dup [ "\"\\" member? ] find dup [
>r cut-slice >r % r> 1 tail-slice r>
dup CHAR: " = [
drop slice-from
] [
drop next-escape >r , r> (parse-string)
] if
] [
"Unterminated string" throw
] if ;
2007-09-20 18:09:08 -04:00
: parse-string ( -- str )
lexer get [
2008-02-15 20:32:14 -05:00
[ swap tail-slice (parse-string) ] "" make swap
2008-03-20 20:54:25 -04:00
] change-lexer-column ;
2007-09-20 18:09:08 -04:00
TUPLE: parse-error file line col text ;
: <parse-error> ( msg -- error )
file get
lexer get [ line>> ] [ column>> ] [ line-text>> ] tri
2007-09-20 18:09:08 -04:00
parse-error construct-boa
[ set-delegate ] keep ;
: parse-dump ( error -- )
dup parse-error-file file.
dup parse-error-line number>string print
dup parse-error-text dup string? [ print ] [ drop ] if
parse-error-col 0 or CHAR: \s <string> write
"^" print ;
M: parse-error error.
dup parse-dump delegate error. ;
SYMBOL: use
SYMBOL: in
: word/vocab% ( word -- )
"(" % dup word-vocabulary % " " % word-name % ")" % ;
: shadow-warning ( new old -- )
2dup eq? [
2drop
] [
[ word/vocab% " shadowed by " % word/vocab% ] "" make
note.
] if ;
: shadow-warnings ( vocab vocabs -- )
[
swapd assoc-stack dup
[ shadow-warning ] [ 2drop ] if
] curry assoc-each ;
: (use+) ( vocab -- )
vocab-words use get 2dup shadow-warnings push ;
: use+ ( vocab -- )
load-vocab (use+) ;
: add-use ( seq -- ) [ use+ ] each ;
: set-use ( seq -- )
[ vocab-words ] map [ ] subset >vector use set ;
: check-vocab-string ( name -- name )
dup string?
[ "Vocabulary name must be a string" throw ] unless ;
: set-in ( name -- )
check-vocab-string dup in set create-vocab (use+) ;
2008-03-20 16:00:49 -04:00
ERROR: unexpected want got ;
2007-09-20 18:09:08 -04:00
2008-03-26 19:23:19 -04:00
PREDICATE: unexpected-eof < unexpected
2007-09-20 18:09:08 -04:00
unexpected-got not ;
: unexpected-eof ( word -- * ) f unexpected ;
: (parse-tokens) ( accum end -- accum )
scan 2dup = [
2drop
] [
[ pick push (parse-tokens) ] [ unexpected-eof ] if*
] if ;
: parse-tokens ( end -- seq )
100 <vector> swap (parse-tokens) >array ;
2008-03-16 03:43:00 -04:00
: create-in ( string -- word )
in get create dup set-word dup save-location ;
2007-09-20 18:09:08 -04:00
: CREATE ( -- word ) scan create-in ;
2008-03-16 03:43:00 -04:00
: CREATE-GENERIC ( -- word ) CREATE dup reset-word ;
: CREATE-WORD ( -- word ) CREATE dup reset-generic ;
2008-03-11 04:36:33 -04:00
: create-class-in ( word -- word )
2008-03-11 04:30:14 -04:00
in get create
2007-12-24 17:18:26 -05:00
dup save-class-location
2007-12-24 19:40:09 -05:00
dup predicate-word dup set-word save-location ;
2007-09-20 18:09:08 -04:00
: CREATE-CLASS ( -- word )
2008-03-11 04:30:14 -04:00
scan create-class-in ;
2007-09-20 18:09:08 -04:00
: word-restarts ( possibilities -- restarts )
natural-sort [
[ "Use the word " swap summary append ] keep
] { } map>assoc ;
2008-03-29 16:50:50 -04:00
TUPLE: no-word-error name ;
2007-09-20 18:09:08 -04:00
2008-03-29 06:03:17 -04:00
M: no-word-error summary
2007-09-20 18:09:08 -04:00
drop "Word not found in current vocabulary search path" ;
: no-word ( name -- newword )
2008-03-29 06:03:17 -04:00
dup no-word-error construct-boa
2008-02-05 00:30:59 -05:00
swap words-named [ forward-reference? not ] subset
word-restarts throw-restarts
2007-09-20 18:09:08 -04:00
dup word-vocabulary (use+) ;
2008-02-05 23:36:10 -05:00
: check-forward ( str word -- word/f )
2007-09-20 18:09:08 -04:00
dup forward-reference? [
drop
2008-02-05 23:36:10 -05:00
use get
2008-01-09 17:36:30 -05:00
[ at ] with map [ ] subset
2007-09-20 18:09:08 -04:00
[ forward-reference? not ] find nip
] [
nip
] if ;
2008-02-05 23:36:10 -05:00
: search ( str -- word/f )
dup use get assoc-stack check-forward ;
2007-09-20 18:09:08 -04:00
: scan-word ( -- word/number/f )
2008-02-05 23:36:10 -05:00
scan dup [
dup search [ ] [
dup string>number [ ] [ no-word ] ?if
] ?if
] when ;
2007-09-20 18:09:08 -04:00
2008-03-16 03:43:00 -04:00
: create-method-in ( class generic -- method )
create-method f set-word dup save-location ;
: CREATE-METHOD ( -- method )
2008-03-18 03:37:31 -04:00
scan-word bootstrap-word scan-word create-method-in ;
2008-03-16 03:43:00 -04:00
2008-03-26 18:07:50 -04:00
: parse-tuple-definition ( -- class superclass slots )
CREATE-CLASS
scan {
{ ";" [ tuple f ] }
{ "<" [ scan-word ";" parse-tokens ] }
[ >r tuple ";" parse-tokens r> prefix ]
2008-03-26 18:07:50 -04:00
} case ;
2008-03-20 16:00:49 -04:00
ERROR: staging-violation word ;
M: staging-violation summary
drop
"A parsing word cannot be used in the same file it is defined in." ;
: execute-parsing ( word -- )
2007-12-30 17:14:15 -05:00
new-definitions get [
dupd first key? [ staging-violation ] when
] when*
execute ;
2007-09-20 18:09:08 -04:00
: parse-step ( accum end -- accum ? )
scan-word {
{ [ 2dup eq? ] [ 2drop f ] }
{ [ dup not ] [ drop unexpected-eof t ] }
{ [ dup delimiter? ] [ unexpected t ] }
{ [ dup parsing? ] [ nip execute-parsing t ] }
2007-09-20 18:09:08 -04:00
{ [ t ] [ pick push drop t ] }
} cond ;
: (parse-until) ( accum end -- accum )
dup >r parse-step [ r> (parse-until) ] [ r> drop ] if ;
: parse-until ( end -- vec )
100 <vector> swap (parse-until) ;
: parsed ( accum obj -- accum ) over push ;
: with-parser ( lexer quot -- newquot )
swap lexer set
[ call >quotation ] [ <parse-error> rethrow ] recover ;
: (parse-lines) ( lexer -- quot )
[ f parse-until ] with-parser ;
SYMBOL: lexer-factory
[ <lexer> ] lexer-factory set-global
: parse-lines ( lines -- quot )
lexer-factory get call (parse-lines) ;
! Parsing word utilities
: parse-effect ( -- effect )
2008-02-06 20:23:39 -05:00
")" parse-tokens "(" over member? [
"Stack effect declaration must not contain (" throw
2007-09-20 18:09:08 -04:00
] [
2008-02-06 20:23:39 -05:00
{ "--" } split1 dup [
<effect>
] [
"Stack effect declaration must contain --" throw
] if
2007-09-20 18:09:08 -04:00
] if ;
2008-03-20 16:00:49 -04:00
ERROR: bad-number ;
2007-09-20 18:09:08 -04:00
: parse-base ( parsed base -- parsed )
scan swap base> [ bad-number ] unless* parsed ;
: parse-literal ( accum end quot -- accum )
>r parse-until r> call parsed ; inline
: parse-definition ( -- quot )
\ ; parse-until >quotation ;
2008-03-16 03:43:00 -04:00
: (:) CREATE-WORD parse-definition ;
SYMBOL: current-class
SYMBOL: current-generic
: (M:)
CREATE-METHOD
[
[ "method-class" word-prop current-class set ]
[ "method-generic" word-prop current-generic set ]
[ ] tri
parse-definition
] with-scope ;
2008-02-26 19:40:32 -05:00
2008-03-27 20:52:53 -04:00
: scan-object ( -- object )
2008-03-27 20:50:41 -04:00
scan-word dup parsing?
[ V{ } clone swap execute first ] when ;
2007-09-20 18:09:08 -04:00
GENERIC: expected>string ( obj -- str )
M: f expected>string drop "end of input" ;
M: word expected>string word-name ;
M: string expected>string ;
M: unexpected error.
"Expected " write
dup unexpected-want expected>string write
" but got " write
unexpected-got expected>string print ;
M: bad-number summary
drop "Bad number literal" ;
SYMBOL: bootstrap-syntax
: with-file-vocabs ( quot -- )
[
"scratchpad" in set
{ "syntax" "scratchpad" } set-use
bootstrap-syntax get [ use get push ] when*
call
] with-scope ; inline
SYMBOL: interactive-vocabs
{
"arrays"
"assocs"
"combinators"
"compiler.errors"
"continuations"
"debugger"
"definitions"
"editors"
"generic"
"help"
"inspector"
"io"
"io.files"
"kernel"
"listener"
"math"
"memory"
"namespaces"
"prettyprint"
"sequences"
"slicing"
"sorting"
"strings"
"syntax"
"tools.annotations"
"tools.crossref"
"tools.memory"
"tools.profiler"
"tools.test"
"tools.threads"
"tools.time"
2008-03-13 04:45:08 -04:00
"tools.vocabs"
"vocabs"
"vocabs.loader"
"words"
"scratchpad"
} interactive-vocabs set-global
: with-interactive-vocabs ( quot -- )
[
"scratchpad" in set
interactive-vocabs get set-use
call
] with-scope ; inline
2007-09-20 18:09:08 -04:00
: parse-fresh ( lines -- quot )
[ parse-lines ] with-file-vocabs ;
2007-09-20 18:09:08 -04:00
: parsing-file ( file -- )
"quiet" get [
drop
] [
"Loading " write <pathname> . flush
] if ;
2008-04-03 05:58:37 -04:00
: filter-moved ( assoc1 assoc2 -- seq )
diff [
2007-09-20 18:09:08 -04:00
drop where dup [ first ] when
file get source-file-path =
2008-04-03 05:58:37 -04:00
] assoc-subset keys ;
: removed-definitions ( -- assoc1 assoc2 )
new-definitions old-definitions
[ get first2 union ] bi@ ;
2007-09-20 18:09:08 -04:00
2008-04-03 05:58:37 -04:00
: removed-classes ( -- assoc1 assoc2 )
2007-12-30 17:14:15 -05:00
new-definitions old-definitions
2008-04-03 05:58:37 -04:00
[ get second ] bi@ ;
2007-12-24 17:18:26 -05:00
2008-04-03 22:43:41 -04:00
: forget-removed-definitions ( -- )
removed-definitions filter-moved forget-all ;
: reset-removed-classes ( -- )
removed-classes
filter-moved [ class? ] subset [ reset-class ] each ;
2007-09-20 18:09:08 -04:00
2008-02-24 01:26:54 -05:00
: fix-class-words ( -- )
#! If a class word had a compound definition which was
#! removed, it must go back to being a symbol.
2008-04-03 01:21:53 -04:00
new-definitions get first2
2008-04-03 22:43:41 -04:00
filter-moved [ [ reset-generic ] [ define-symbol ] bi ] each ;
2008-02-24 01:26:54 -05:00
2007-09-20 18:09:08 -04:00
: forget-smudged ( -- )
2008-04-03 22:43:41 -04:00
forget-removed-definitions
reset-removed-classes
2008-02-24 01:26:54 -05:00
fix-class-words ;
2007-09-20 18:09:08 -04:00
2008-01-12 17:25:29 -05:00
: finish-parsing ( lines quot -- )
file get
2008-04-03 01:21:53 -04:00
[ record-form ]
[ record-definitions ]
[ record-checksum ]
tri ;
2007-09-20 18:09:08 -04:00
: parse-stream ( stream name -- quot )
[
[
2008-01-12 17:25:29 -05:00
lines dup parse-fresh
tuck finish-parsing
forget-smudged
] with-source-file
] with-compilation-unit ;
2007-09-20 18:09:08 -04:00
: parse-file-restarts ( file -- restarts )
"Load " swap " again" 3append t 2array 1array ;
: parse-file ( file -- quot )
[
[
[ parsing-file ] keep
[ utf8 <file-reader> ] keep
parse-stream
] with-compiler-errors
2007-09-20 18:09:08 -04:00
] [
over parse-file-restarts rethrow-restarts
drop parse-file
] recover ;
: run-file ( file -- )
2008-02-26 22:03:14 -05:00
[ dup parse-file call ] assert-depth drop ;
2007-09-20 18:09:08 -04:00
: ?run-file ( path -- )
dup exists? [ run-file ] [ drop ] if ;
2007-09-20 18:09:08 -04:00
: bootstrap-file ( path -- )
[ parse-file % ] [ run-file ] if-bootstrapping ;
2007-09-20 18:09:08 -04:00
: eval ( str -- )
[ string-lines parse-fresh ] with-compilation-unit call ;
2007-09-20 18:09:08 -04:00
: eval>string ( str -- output )
[
parser-notes off
[ [ eval ] keep ] try drop
] with-string-writer ;