2004-07-16 02:26:21 -04:00
|
|
|
!:folding=indent:collapseFolds=1:
|
|
|
|
|
|
|
|
! $Id$
|
|
|
|
!
|
|
|
|
! Copyright (C) 2004 Slava Pestov.
|
|
|
|
!
|
|
|
|
! Redistribution and use in source and binary forms, with or without
|
|
|
|
! modification, are permitted provided that the following conditions are met:
|
|
|
|
!
|
|
|
|
! 1. Redistributions of source code must retain the above copyright notice,
|
|
|
|
! this list of conditions and the following disclaimer.
|
|
|
|
!
|
|
|
|
! 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
! this list of conditions and the following disclaimer in the documentation
|
|
|
|
! and/or other materials provided with the distribution.
|
|
|
|
!
|
|
|
|
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
|
|
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
|
|
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
|
|
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
|
|
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
IN: parser
|
|
|
|
USE: arithmetic
|
|
|
|
USE: combinators
|
|
|
|
USE: errors
|
|
|
|
USE: kernel
|
|
|
|
USE: lists
|
|
|
|
USE: logic
|
|
|
|
USE: namespaces
|
|
|
|
USE: stack
|
|
|
|
USE: strings
|
|
|
|
USE: words
|
|
|
|
USE: vocabularies
|
2004-07-19 00:34:03 -04:00
|
|
|
USE: unparser
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
! Number parsing
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: letter? #\a #\z between? ;
|
|
|
|
: LETTER? #\A #\Z between? ;
|
2004-07-16 02:26:21 -04:00
|
|
|
: digit? #\0 #\9 between? ;
|
2004-07-21 19:26:41 -04:00
|
|
|
|
2004-07-19 17:36:20 -04:00
|
|
|
: not-a-number "Not a number" throw ;
|
2004-07-21 19:26:41 -04:00
|
|
|
|
|
|
|
: digit> ( ch -- n )
|
|
|
|
[
|
|
|
|
[ digit? ] [ #\0 - ]
|
|
|
|
[ letter? ] [ #\a - 10 + ]
|
|
|
|
[ LETTER? ] [ #\A - 10 + ]
|
|
|
|
[ drop t ] [ not-a-number ]
|
|
|
|
] cond ;
|
|
|
|
|
|
|
|
: >digit ( n -- ch )
|
|
|
|
dup 10 < [ #\0 + ] [ 10 - #\a + ] ifte ;
|
|
|
|
|
|
|
|
: digit ( num digit -- num )
|
|
|
|
"base" get swap 2dup >= [
|
|
|
|
>r * r> +
|
|
|
|
] [
|
|
|
|
not-a-number
|
|
|
|
] ifte ;
|
2004-07-16 02:26:21 -04:00
|
|
|
|
2004-07-19 17:36:20 -04:00
|
|
|
: (str>fixnum) ( str -- num )
|
|
|
|
0 swap [ digit> digit ] str-each ;
|
|
|
|
|
2004-07-16 02:26:21 -04:00
|
|
|
: str>fixnum ( str -- num )
|
|
|
|
#! Parse a string representation of an integer.
|
2004-07-19 17:36:20 -04:00
|
|
|
dup str-length 0 = [
|
|
|
|
drop not-a-number
|
|
|
|
] [
|
|
|
|
dup "-" str-head? dup [
|
|
|
|
nip str>fixnum neg
|
|
|
|
] [
|
|
|
|
drop (str>fixnum)
|
|
|
|
] ifte
|
|
|
|
] ifte ;
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
! The parser uses a number of variables:
|
|
|
|
! line - the line being parsed
|
|
|
|
! pos - position in the line
|
|
|
|
! use - list of vocabularies
|
|
|
|
! in - vocabulary for new words
|
|
|
|
!
|
|
|
|
! When a token is scanned, it is searched for in the 'use' list
|
|
|
|
! of vocabularies. If it is a parsing word, it is executed
|
|
|
|
! immediately. Otherwise it is appended to the parse tree.
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: parsing? ( word -- ? )
|
|
|
|
dup word? [
|
|
|
|
"parsing" swap word-property
|
|
|
|
] [
|
|
|
|
drop f
|
|
|
|
] ifte ;
|
|
|
|
|
2004-07-16 02:26:21 -04:00
|
|
|
: parsing ( -- ) t "parsing" word set-word-property ;
|
|
|
|
|
2004-07-18 19:52:01 -04:00
|
|
|
: <parsing "line" set 0 "pos" set ;
|
2004-07-21 22:45:43 -04:00
|
|
|
: parsing> "line" off "pos" off ;
|
2004-07-16 02:26:21 -04:00
|
|
|
: end? ( -- ? ) "pos" get "line" get str-length >= ;
|
|
|
|
: ch ( -- ch ) "pos" get "line" get str-nth ;
|
|
|
|
: advance ( -- ) "pos" succ@ ;
|
|
|
|
|
|
|
|
: ch-blank? ( -- ? ) end? [ f ] [ ch blank? ] ifte ;
|
|
|
|
: skip-blank ( -- ) [ ch-blank? ] [ advance ] while ;
|
|
|
|
: ch-word? ( -- ? ) end? [ f ] [ ch blank? not ] ifte ;
|
|
|
|
: skip-word ( -- ) [ ch-word? ] [ advance ] while ;
|
|
|
|
|
|
|
|
: ch-dispatch? ( -- ? )
|
|
|
|
#! Hard-coded for now. Make this customizable later.
|
|
|
|
#! A 'dispatch' is a character that is treated as its
|
|
|
|
#! own word, eg:
|
|
|
|
#!
|
|
|
|
#! "hello world"
|
|
|
|
#!
|
|
|
|
#! Will call the parsing word ".
|
2004-07-19 00:34:03 -04:00
|
|
|
ch "\"!" str-contains? ;
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
: (scan) ( -- start end )
|
|
|
|
skip-blank "pos" get
|
|
|
|
end? [
|
|
|
|
dup
|
|
|
|
] [
|
|
|
|
ch-dispatch? [ advance ] [ skip-word ] ifte "pos" get
|
|
|
|
] ifte ;
|
|
|
|
|
|
|
|
: scan ( -- str )
|
|
|
|
(scan) 2dup = [ 2drop f ] [ "line" get substring ] ifte ;
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: parse-word ( str -- obj )
|
|
|
|
dup "use" get search dup [
|
|
|
|
nip
|
|
|
|
] [
|
|
|
|
drop str>fixnum
|
|
|
|
] ifte ;
|
|
|
|
|
2004-07-21 22:45:43 -04:00
|
|
|
: parsed| ( obj -- )
|
|
|
|
#! Some ugly ugly code to handle [ a | b ] expressions.
|
|
|
|
>r dup nreverse last* r> swap rplacd swons ;
|
|
|
|
|
|
|
|
: expect-] ( -- )
|
|
|
|
scan "]" = not [ "Expected ]" throw ] when ;
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: parsed ( obj -- )
|
2004-07-21 22:45:43 -04:00
|
|
|
over "|" = [ nip parsed| expect-] ] [ swons ] ifte ;
|
2004-07-21 19:26:41 -04:00
|
|
|
|
2004-07-16 02:26:21 -04:00
|
|
|
: number, ( num -- )
|
2004-07-21 19:26:41 -04:00
|
|
|
str>fixnum parsed ;
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
: word, ( str -- )
|
|
|
|
[
|
2004-07-21 19:26:41 -04:00
|
|
|
parse-word dup parsing? [ execute ] [ parsed ] ifte
|
2004-07-16 02:26:21 -04:00
|
|
|
] when* ;
|
|
|
|
|
2004-07-18 19:52:01 -04:00
|
|
|
: (parse) <parsing [ end? not ] [ scan word, ] while parsing> ;
|
|
|
|
|
|
|
|
: parse ( str -- code )
|
2004-07-16 02:26:21 -04:00
|
|
|
#! Parse the string into a parse tree that can be executed.
|
2004-07-18 19:52:01 -04:00
|
|
|
f swap (parse) nreverse ;
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
: eval ( "X" -- X )
|
|
|
|
parse call ;
|
|
|
|
|
|
|
|
!!! Used by parsing words
|
|
|
|
: ch-search ( ch -- index )
|
|
|
|
"pos" get "line" get rot index-of* ;
|
|
|
|
|
|
|
|
: (until) ( index -- str )
|
|
|
|
"pos" get swap dup succ "pos" set "line" get substring ;
|
|
|
|
|
|
|
|
: until ( ch -- str )
|
|
|
|
ch-search (until) ;
|
|
|
|
|
|
|
|
: until-eol ( ch -- str )
|
|
|
|
"line" get str-length (until) ;
|
|
|
|
|
2004-07-19 00:34:03 -04:00
|
|
|
: next-ch ( -- ch )
|
|
|
|
end? [ "Unexpected EOF" throw ] [ ch advance ] ifte ;
|
|
|
|
|
2004-07-16 02:26:21 -04:00
|
|
|
!!! Parsing words. 'builtins' is a stupid vocabulary name now
|
|
|
|
!!! that it does not contain Java words anymore!
|
|
|
|
|
|
|
|
IN: builtins
|
|
|
|
|
|
|
|
! Constants
|
2004-07-21 19:26:41 -04:00
|
|
|
: t t parsed ; parsing
|
|
|
|
: f f parsed ; parsing
|
2004-07-16 02:26:21 -04:00
|
|
|
|
|
|
|
! Lists
|
|
|
|
: [ f ; parsing
|
2004-07-21 19:26:41 -04:00
|
|
|
: ] nreverse parsed ; parsing
|
|
|
|
|
|
|
|
: | ( syntax: | cdr ] )
|
2004-07-21 22:45:43 -04:00
|
|
|
#! See the word 'parsed'. We push a special sentinel, and
|
|
|
|
#! 'parsed' acts accordingly.
|
|
|
|
"|" ; parsing
|
2004-07-21 19:26:41 -04:00
|
|
|
|
2004-07-16 02:26:21 -04:00
|
|
|
! Colon defs
|
|
|
|
: :
|
|
|
|
#! Begin a word definition. Word name follows.
|
|
|
|
scan "in" get create f ; parsing
|
|
|
|
|
|
|
|
: ;
|
|
|
|
#! End a word definition.
|
|
|
|
nreverse define ; parsing
|
|
|
|
|
|
|
|
! Vocabularies
|
2004-07-19 00:34:03 -04:00
|
|
|
: DEFER: scan "in" get create drop ; parsing
|
2004-07-16 02:26:21 -04:00
|
|
|
: USE: scan "use" cons@ ; parsing
|
|
|
|
: IN: scan dup "use" cons@ "in" set ; parsing
|
2004-07-19 00:34:03 -04:00
|
|
|
|
|
|
|
! \x
|
|
|
|
: escape ( ch -- esc )
|
|
|
|
[
|
|
|
|
[ #\e | #\\e ]
|
|
|
|
[ #\n | #\\n ]
|
|
|
|
[ #\r | #\\r ]
|
|
|
|
[ #\t | #\\t ]
|
|
|
|
[ #\s | #\\s ]
|
|
|
|
[ #\\s | #\\s ]
|
|
|
|
[ #\0 | #\\0 ]
|
|
|
|
[ #\\\ | #\\\ ]
|
|
|
|
[ #\\" | #\\" ]
|
|
|
|
] assoc ;
|
|
|
|
|
|
|
|
! String literal
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: parse-escape ( -- )
|
2004-07-19 00:34:03 -04:00
|
|
|
next-ch escape dup [ % ] [ drop "Bad escape" throw ] ifte ;
|
|
|
|
|
2004-07-21 19:26:41 -04:00
|
|
|
: parse-string ( -- )
|
2004-07-19 00:34:03 -04:00
|
|
|
next-ch dup #\" = [
|
|
|
|
drop
|
|
|
|
] [
|
2004-07-21 19:26:41 -04:00
|
|
|
dup #\\\ = [ drop parse-escape ] [ % ] ifte parse-string
|
2004-07-19 00:34:03 -04:00
|
|
|
] ifte ;
|
|
|
|
|
|
|
|
: "
|
|
|
|
#! Note the ugly hack to carry the new value of 'pos' from
|
|
|
|
#! the <% %> scope up to the original scope.
|
2004-07-21 19:26:41 -04:00
|
|
|
<% parse-string "pos" get %> swap "pos" set parsed ; parsing
|
2004-07-19 00:34:03 -04:00
|
|
|
|
|
|
|
! Comments
|
|
|
|
: ( ")" until drop ; parsing
|
|
|
|
: ! until-eol drop ; parsing
|
|
|
|
: #! until-eol drop ; parsing
|
2004-07-21 19:26:41 -04:00
|
|
|
|
|
|
|
! Reading numbers in other bases
|
|
|
|
|
|
|
|
: BASE: ( base -- )
|
|
|
|
#! Read a number in a specific base.
|
|
|
|
"base" get >r "base" set scan number, r> "base" set ;
|
|
|
|
|
|
|
|
: HEX: 16 BASE: ; parsing
|
|
|
|
: DEC: 10 BASE: ; parsing
|
|
|
|
: OCT: 8 BASE: ; parsing
|
|
|
|
: BIN: 2 BASE: ; parsing
|