factor/library/syntax/parse-words.factor

183 lines
4.6 KiB
Factor
Raw Normal View History

2005-05-12 16:28:09 -04:00
! Copyright (C) 2005 Slava Pestov.
! See http://factor.sf.net/license.txt for BSD license.
IN: parser
USING: errors hashtables io kernel lists math namespaces
sequences strings vectors words ;
2005-05-12 16:28:09 -04:00
! The parser uses a number of variables:
! line - the line being parsed
! pos - position in the line
! use - list of vocabularies
! in - vocabulary for new words
!
! When a token is scanned, it is searched for in the 'use' list
! of vocabularies. If it is a parsing word, it is executed
! immediately. Otherwise it is appended to the parse tree.
SYMBOL: use
SYMBOL: in
: check-vocab ( name -- vocab )
dup vocab
[ ] [ " is not a vocabulary name" append throw ] ?if ;
: use+ ( string -- )
#! Add a vocabulary to the search path.
check-vocab use get push ;
2005-08-19 21:46:12 -04:00
: set-use ( seq -- )
2005-12-17 14:52:27 -05:00
#! Convert to a later so we can push later.
[ check-vocab ] map >vector use set ;
: set-in ( name -- )
dup ensure-vocab dup in set use+ ;
2005-05-12 16:28:09 -04:00
: parsing? ( word -- ? )
2005-09-24 15:21:17 -04:00
dup word? [ "parsing" word-prop ] [ drop f ] if ;
2005-05-12 16:28:09 -04:00
SYMBOL: file
SYMBOL: line-number
2005-05-12 16:28:09 -04:00
2005-12-17 14:52:27 -05:00
SYMBOL: line-text
SYMBOL: column
2005-05-12 16:28:09 -04:00
: skip ( i seq quot -- n | quot: elt -- ? )
over >r find* drop dup -1 =
2005-09-24 15:21:17 -04:00
[ drop r> length ] [ r> drop ] if ; inline
2005-05-12 16:28:09 -04:00
: skip-blank ( -- )
2005-12-17 14:52:27 -05:00
column [ line-text get [ blank? not ] skip ] change ;
2005-05-12 16:28:09 -04:00
: skip-word ( n line -- n )
2005-09-24 15:21:17 -04:00
2dup nth CHAR: " = [ drop 1+ ] [ [ blank? ] skip ] if ;
2005-05-12 16:28:09 -04:00
: (scan) ( n line -- start end )
2005-09-24 15:21:17 -04:00
dupd 2dup length < [ skip-word ] [ drop ] if ;
2005-05-12 16:28:09 -04:00
: scan ( -- token )
skip-blank
2005-12-17 14:52:27 -05:00
column [ line-text get (scan) dup ] change
2dup = [ 2drop f ] [ line-text get subseq ] if ;
2005-05-12 16:28:09 -04:00
: save-location ( word -- )
#! Remember where this word was defined.
dup set-word
dup line-number get "line" set-word-prop
file get "file" set-word-prop ;
: create-in in get create dup save-location ;
2005-05-12 16:28:09 -04:00
: CREATE ( -- word ) scan create-in ;
! If this variable is on, the parser does not internalize words;
! it just appends strings to the parse tree as they are read.
SYMBOL: string-mode
global [ string-mode off ] bind
: scan-word ( -- obj )
scan dup [
dup ";" = not string-mode get and [
dup use get hash-stack [ ] [ string>number ] ?if
2005-05-12 16:28:09 -04:00
] unless
] when ;
! Used by parsing words
: ch-search ( ch -- index )
2005-12-17 14:52:27 -05:00
column get line-text get index* ;
2005-05-12 16:28:09 -04:00
: (until) ( index -- str )
2005-12-17 14:52:27 -05:00
column [ swap dup 1+ ] change line-text get subseq ;
2005-05-12 16:28:09 -04:00
: until ( ch -- str )
ch-search (until) ;
: (until-eol) ( -- index )
2005-12-17 14:52:27 -05:00
CHAR: \n ch-search dup -1 =
[ drop line-text get length ] when ;
2005-05-12 16:28:09 -04:00
: until-eol ( -- str )
#! This is just a hack to get "eval" to work with multiline
#! strings from jEdit with EOL comments. Normally, input to
#! the parser is already line-tokenized.
(until-eol) (until) ;
: escape ( ch -- esc )
2005-11-27 17:45:48 -05:00
H{
{ CHAR: e CHAR: \e }
{ CHAR: n CHAR: \n }
{ CHAR: r CHAR: \r }
{ CHAR: t CHAR: \t }
{ CHAR: s CHAR: \s }
{ CHAR: \s CHAR: \s }
{ CHAR: 0 CHAR: \0 }
{ CHAR: \\ CHAR: \\ }
{ CHAR: \" CHAR: \" }
} hash dup [ "Bad escape" throw ] unless ;
2005-05-12 16:28:09 -04:00
: next-escape ( n str -- ch n )
2dup nth CHAR: u = [
2005-09-16 22:47:28 -04:00
swap 1+ dup 4 + [ rot subseq hex> ] keep
2005-05-12 16:28:09 -04:00
] [
2005-09-16 22:47:28 -04:00
over 1+ >r nth escape r>
2005-09-24 15:21:17 -04:00
] if ;
2005-05-12 16:28:09 -04:00
: next-char ( n str -- ch n )
2dup nth CHAR: \\ = [
2005-09-16 22:47:28 -04:00
>r 1+ r> next-escape
2005-05-12 16:28:09 -04:00
] [
2005-09-16 22:47:28 -04:00
over 1+ >r nth r>
2005-09-24 15:21:17 -04:00
] if ;
2005-05-12 16:28:09 -04:00
: doc-comment-here? ( parsed -- ? )
not "in-definition" get and ;
: parsed-stack-effect ( parsed str -- parsed )
over doc-comment-here? [
word "stack-effect" word-prop [
drop
] [
word swap "stack-effect" set-word-prop
2005-09-24 15:21:17 -04:00
] if
2005-05-12 16:28:09 -04:00
] [
drop
2005-09-24 15:21:17 -04:00
] if ;
2005-05-12 16:28:09 -04:00
: documentation+ ( word str -- )
over "documentation" word-prop [
2005-05-18 16:26:22 -04:00
swap "\n" swap append3
2005-05-12 16:28:09 -04:00
] when*
"documentation" set-word-prop ;
: parsed-documentation ( parsed str -- parsed )
over doc-comment-here? [
word swap documentation+
] [
drop
2005-09-24 15:21:17 -04:00
] if ;
2005-06-23 22:35:41 -04:00
: (parse-string) ( n str -- n )
2dup nth CHAR: " = [
2005-09-16 22:47:28 -04:00
drop 1+
2005-06-23 22:35:41 -04:00
] [
[ next-char swap , ] keep (parse-string)
2005-09-24 15:21:17 -04:00
] if ;
2005-06-23 22:35:41 -04:00
: parse-string ( -- str )
#! Read a string from the input stream, until it is
#! terminated by a ".
2005-12-17 14:52:27 -05:00
column [
[ line-text get (parse-string) ] "" make swap
2005-06-23 22:35:41 -04:00
] change ;
global [
{
2005-12-22 22:26:54 -05:00
"scratchpad" "syntax" "arrays" "compiler" "errors"
"generic" "hashtables" "inference" "inspector"
2005-12-24 18:29:31 -05:00
"io" "jedit" "kernel" "listener" "lists" "math" "memory"
"namespaces" "parser" "prettyprint" "queues" "sequences"
"shells" "strings" "styles" "test" "threads" "vectors"
"walker" "words"
} set-use
"scratchpad" set-in
] bind