factor/library/syntax/parse-words.factor

148 lines
3.9 KiB
Factor

! Copyright (C) 2005 Slava Pestov.
! See http://factor.sf.net/license.txt for BSD license.
IN: parser
USING: errors hashtables io kernel lists math namespaces
sequences strings vectors words ;
! The parser uses a number of variables:
! line - the line being parsed
! pos - position in the line
! use - list of vocabularies
! in - vocabulary for new words
!
! When a token is scanned, it is searched for in the 'use' list
! of vocabularies. If it is a parsing word, it is executed
! immediately. Otherwise it is appended to the parse tree.
SYMBOL: use
SYMBOL: in
: check-vocab ( name -- vocab )
dup vocab
[ ] [ " is not a vocabulary name" append throw ] ?if ;
: use+ ( string -- )
#! Add a vocabulary to the search path.
check-vocab use get push ;
: set-use ( seq -- )
#! Convert to a later so we can push later.
[ check-vocab ] map >vector use set ;
: set-in ( name -- )
dup ensure-vocab dup in set use+ ;
: parsing? ( word -- ? )
dup word? [ "parsing" word-prop ] [ drop f ] if ;
SYMBOL: file
SYMBOL: line-number
SYMBOL: line-text
SYMBOL: column
: skip ( i seq quot -- n | quot: elt -- ? )
over >r find* drop dup -1 =
[ drop r> length ] [ r> drop ] if ; inline
: skip-blank ( -- )
column [ line-text get [ blank? not ] skip ] change ;
: skip-word ( n line -- n )
2dup nth CHAR: " = [ drop 1+ ] [ [ blank? ] skip ] if ;
: (scan) ( n line -- start end )
dupd 2dup length < [ skip-word ] [ drop ] if ;
: scan ( -- token )
skip-blank
column [ line-text get (scan) dup ] change
2dup = [ 2drop f ] [ line-text get subseq ] if ;
: save-location ( word -- )
#! Remember where this word was defined.
dup set-word
dup line-number get "line" set-word-prop
file get "file" set-word-prop ;
: create-in in get create dup save-location ;
: CREATE ( -- word ) scan create-in ;
! If this variable is on, the parser does not internalize words;
! it just appends strings to the parse tree as they are read.
SYMBOL: string-mode
global [ string-mode off ] bind
: scan-word ( -- obj )
scan dup [
dup ";" = not string-mode get and [
dup use get hash-stack [ ] [ string>number ] ?if
] unless
] when ;
! Used by parsing words
: ch-search ( ch -- index ) column get line-text get index* ;
: until ( index -- str ) 1+ column set ;
: until-eol ( -- )
#! This is just a hack to get "eval" to work with multiline
#! strings from jEdit with EOL comments. Normally, input to
#! the parser is already line-tokenized.
CHAR: \n ch-search dup -1 =
[ drop line-text get length ] when until ;
: escape ( ch -- esc )
H{
{ CHAR: e CHAR: \e }
{ CHAR: n CHAR: \n }
{ CHAR: r CHAR: \r }
{ CHAR: t CHAR: \t }
{ CHAR: s CHAR: \s }
{ CHAR: \s CHAR: \s }
{ CHAR: 0 CHAR: \0 }
{ CHAR: \\ CHAR: \\ }
{ CHAR: \" CHAR: \" }
} hash dup [ "Bad escape" throw ] unless ;
: next-escape ( n str -- ch n )
2dup nth CHAR: u = [
swap 1+ dup 4 + [ rot subseq hex> ] keep
] [
over 1+ >r nth escape r>
] if ;
: next-char ( n str -- ch n )
2dup nth CHAR: \\ = [
>r 1+ r> next-escape
] [
over 1+ >r nth r>
] if ;
: (parse-string) ( n str -- n )
2dup nth CHAR: " = [
drop 1+
] [
[ next-char swap , ] keep (parse-string)
] if ;
: parse-string ( -- str )
#! Read a string from the input stream, until it is
#! terminated by a ".
column [
[ line-text get (parse-string) ] "" make swap
] change ;
global [
{
"scratchpad" "syntax" "arrays" "compiler" "errors"
"generic" "hashtables" "help" "inference" "inspector"
"io" "jedit" "kernel" "listener" "lists" "math" "memory"
"namespaces" "parser" "prettyprint" "queues" "sequences"
"shells" "strings" "styles" "test" "threads" "vectors"
"walker" "words"
} set-use
"scratchpad" set-in
] bind