factor/library/syntax/parser.factor

194 lines
5.2 KiB
Factor
Raw Normal View History

! :folding=indent:collapseFolds=1:
2004-07-16 02:26:21 -04:00
! $Id$
!
! Copyright (C) 2004 Slava Pestov.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions are met:
!
! 1. Redistributions of source code must retain the above copyright notice,
! this list of conditions and the following disclaimer.
!
! 2. Redistributions in binary form must reproduce the above copyright notice,
! this list of conditions and the following disclaimer in the documentation
! and/or other materials provided with the distribution.
!
! THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
! INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
! FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
! DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
! OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
! OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
! ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IN: parser
USE: errors
USE: kernel
USE: lists
2004-08-26 22:21:17 -04:00
USE: math
2004-07-16 02:26:21 -04:00
USE: namespaces
USE: strings
USE: words
USE: unparser
2004-07-16 02:26:21 -04:00
! The parser uses a number of variables:
! line - the line being parsed
! pos - position in the line
! use - list of vocabularies
! in - vocabulary for new words
!
! When a token is scanned, it is searched for in the 'use' list
! of vocabularies. If it is a parsing word, it is executed
! immediately. Otherwise it is appended to the parse tree.
: parsing? ( word -- ? )
dup word? [
2004-09-28 00:24:36 -04:00
"parsing" word-property
] [
drop f
] ifte ;
: end? ( -- ? )
"col" get "line" get str-length >= ;
: (with-parser) ( quot -- )
end? [ drop ] [ [ call ] keep (with-parser) ] ifte ;
: with-parser ( text quot -- )
#! Keep calling the quotation until we reach the end of the
#! input.
swap "line" set 0 "col" set
(with-parser)
"line" off "col" off ;
2004-08-18 19:22:15 -04:00
: ch ( -- ch ) "col" get "line" get str-nth ;
: advance ( -- ) "col" succ@ ;
2004-07-16 02:26:21 -04:00
2004-08-12 02:13:43 -04:00
: skip ( n line quot -- n )
#! Find the next character that satisfies the quotation,
#! which should have stack effect ( ch -- ? ).
>r 2dup str-length < [
2dup str-nth r> dup >r call [
r> 2drop
] [
>r succ r> r> skip
] ifte
] [
r> drop nip str-length
] ifte ;
: skip-blank ( n line -- n )
[ blank? not ] skip ;
2004-07-16 02:26:21 -04:00
2004-08-12 02:13:43 -04:00
: skip-word ( n line -- n )
[ blank? ] skip ;
: denotation? ( ch -- ? )
2004-07-16 02:26:21 -04:00
#! Hard-coded for now. Make this customizable later.
2004-08-12 02:13:43 -04:00
#! A 'denotation' is a character that is treated as its
2004-07-16 02:26:21 -04:00
#! own word, eg:
#!
#! "hello world"
#!
#! Will call the parsing word ".
2004-08-12 02:13:43 -04:00
"\"" str-contains? ;
: (scan) ( n line -- start end )
dup >r skip-blank dup r>
2dup str-length < [
2dup str-nth denotation? [
drop succ
] [
skip-word
] ifte
2004-07-16 02:26:21 -04:00
] [
2004-08-12 02:13:43 -04:00
drop
2004-07-16 02:26:21 -04:00
] ifte ;
2004-08-12 02:13:43 -04:00
: scan ( -- token )
2004-08-18 19:22:15 -04:00
"col" get "line" get dup >r (scan) dup "col" set
2004-08-12 02:13:43 -04:00
2dup = [
r> 3drop f
] [
r> substring
] ifte ;
2004-07-16 02:26:21 -04:00
: scan-word ( -- obj )
scan dup [
dup "use" get search dup [
nip
] [
drop str>number
] ifte
] when ;
: parsed| ( parsed parsed obj -- parsed )
#! Some ugly ugly code to handle [ a | b ] expressions.
>r unswons r> cons swap [ swons ] each swons ;
: expect ( word -- )
dup scan = not [
"Expected " swap cat2 throw
] [
drop
] ifte ;
: parsed ( obj -- )
over "|" = [ nip parsed| "]" expect ] [ swons ] ifte ;
: (parse) ( str -- )
2004-07-16 02:26:21 -04:00
[
scan-word [
dup parsing? [ execute ] [ parsed ] ifte
] when*
] with-parser ;
2004-07-18 19:52:01 -04:00
: parse ( str -- code )
2004-07-16 02:26:21 -04:00
#! Parse the string into a parse tree that can be executed.
f swap (parse) reverse ;
2004-07-16 02:26:21 -04:00
: eval ( "X" -- X )
parse call ;
! Used by parsing words
2004-07-16 02:26:21 -04:00
: ch-search ( ch -- index )
2004-08-18 19:22:15 -04:00
"col" get "line" get rot index-of* ;
2004-07-16 02:26:21 -04:00
: (until) ( index -- str )
2004-08-18 19:22:15 -04:00
"col" get swap dup succ "col" set "line" get substring ;
2004-07-16 02:26:21 -04:00
: until ( ch -- str )
ch-search (until) ;
2004-10-27 23:13:00 -04:00
: (until-eol) ( -- index )
"\n" ch-search dup -1 = [ drop "line" get str-length ] when ;
: until-eol ( -- str )
2004-10-27 23:13:00 -04:00
#! This is just a hack to get "eval" to work with multiline
#! strings from jEdit with EOL comments. Normally, input to
#! the parser is already line-tokenized.
(until-eol) (until) ;
2004-07-16 02:26:21 -04:00
: next-ch ( -- ch )
end? [ "Unexpected EOF" throw ] [ ch advance ] ifte ;
2004-08-12 02:13:43 -04:00
: next-word-ch ( -- ch )
2004-08-18 19:22:15 -04:00
"col" get "line" get skip-blank "col" set next-ch ;
2004-08-20 18:48:08 -04:00
IN: syntax
: parsing ( -- )
#! Mark the most recently defined word to execute at parse
#! time, rather than run time. The word can use 'scan' to
#! read ahead in the input stream.
word t "parsing" set-word-property ;
2004-08-20 18:48:08 -04:00
! Once this file has loaded, we can use 'parsing' normally.
! This hack is needed because in Java Factor, 'parsing' is
! not parsing, but in CFactor, it is.
2004-10-16 21:55:13 -04:00
\ parsing t "parsing" set-word-property