factor/core/lexer/lexer.factor

183 lines
4.6 KiB
Factor
Raw Normal View History

! Copyright (C) 2008, 2010 Slava Pestov, Joe Groff.
2008-06-25 04:25:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
2013-03-05 13:34:47 -05:00
USING: accessors arrays combinators continuations io kernel
2014-05-20 11:53:41 -04:00
kernel.private math math.parser namespaces sequences
sequences.private source-files.errors strings vectors ;
2008-06-25 04:25:08 -04:00
IN: lexer
2012-07-27 19:05:28 -04:00
TUPLE: lexer
{ text array }
{ line fixnum }
{ line-text string }
2012-07-27 19:05:28 -04:00
{ line-length fixnum }
{ column fixnum }
{ parsing-words vector } ;
TUPLE: lexer-parsing-word word line line-text column ;
2008-06-25 04:25:08 -04:00
ERROR: not-a-lexer object ;
2014-05-20 11:53:41 -04:00
: check-lexer ( lexer -- lexer )
dup lexer? [ not-a-lexer ] unless ; inline
2014-05-20 11:53:41 -04:00
2008-06-25 04:25:08 -04:00
: next-line ( lexer -- )
2014-05-20 11:53:41 -04:00
check-lexer
dup [ line>> ] [ text>> ] bi ?nth "" or
2011-09-17 00:54:17 -04:00
[ >>line-text ] [ length >>line-length ] bi
[ 1 + ] change-line
2008-06-25 04:25:08 -04:00
0 >>column
drop ;
: push-parsing-word ( word -- )
2014-05-20 11:53:41 -04:00
lexer get check-lexer [
2014-02-21 04:10:21 -05:00
[ line>> ] [ line-text>> ] [ column>> ] tri
lexer-parsing-word boa
] [ parsing-words>> push ] bi ;
: pop-parsing-word ( -- )
2014-05-20 11:53:41 -04:00
lexer get check-lexer parsing-words>> pop* ;
2008-06-25 04:25:08 -04:00
: new-lexer ( text class -- lexer )
new
0 >>line
swap >>text
V{ } clone >>parsing-words
2008-06-25 04:25:08 -04:00
dup next-line ; inline
: <lexer> ( text -- lexer )
lexer new-lexer ;
2009-06-16 14:47:56 -04:00
ERROR: unexpected want got ;
: forbid-tab ( c -- c )
[ CHAR: \t eq? [ "[space]" "[tab]" unexpected ] when ] keep ; inline
2009-06-16 14:47:56 -04:00
2008-06-25 04:25:08 -04:00
: skip ( i seq ? -- n )
2014-02-21 04:10:21 -05:00
over length [
[ swap forbid-tab CHAR: \s eq? xor ] curry find-from drop
] dip or ; inline
2008-06-25 04:25:08 -04:00
: change-lexer-column ( lexer quot -- )
[ check-lexer [ column>> ] [ line-text>> ] bi ] prepose
keep column<< ; inline
2008-06-25 04:25:08 -04:00
GENERIC: skip-blank ( lexer -- )
M: lexer skip-blank
2008-06-25 04:25:08 -04:00
[ t skip ] change-lexer-column ;
GENERIC: skip-word ( lexer -- )
<PRIVATE
: quote? ( column text -- ? )
2014-05-20 11:53:41 -04:00
{ fixnum string } declare nth CHAR: " eq? ;
: shebang? ( column text -- ? )
2014-05-20 11:53:41 -04:00
{ fixnum string } declare swap zero? [
dup length 1 > [
dup first-unsafe CHAR: # =
[ second-unsafe CHAR: ! = ] [ drop f ] if
] [ drop f ] if
2014-05-20 11:53:41 -04:00
] [ drop f ] if ;
PRIVATE>
M: lexer skip-word
2008-06-25 04:25:08 -04:00
[
{
{ [ 2dup quote? ] [ drop 1 + ] }
{ [ 2dup shebang? ] [ drop 2 + ] }
[ f skip ]
} cond
2008-06-25 04:25:08 -04:00
] change-lexer-column ;
: still-parsing? ( lexer -- ? )
2014-05-20 11:53:41 -04:00
check-lexer [ line>> ] [ text>> length ] bi <= ;
2008-06-25 04:25:08 -04:00
: still-parsing-line? ( lexer -- ? )
2014-05-20 11:53:41 -04:00
check-lexer [ column>> ] [ line-length>> ] bi < ;
2008-06-25 04:25:08 -04:00
: (parse-token) ( lexer -- str )
2014-05-20 11:53:41 -04:00
check-lexer {
2008-08-29 17:16:43 -04:00
[ column>> ]
[ skip-word ]
[ column>> ]
[ line-text>> ]
} cleave subseq ;
2008-06-25 04:25:08 -04:00
: parse-token ( lexer -- str/f )
2008-06-25 04:25:08 -04:00
dup still-parsing? [
dup skip-blank
dup still-parsing-line?
[ (parse-token) ] [ dup next-line parse-token ] if
] [ drop f ] if ;
: ?scan-token ( -- str/f ) lexer get parse-token ;
2008-06-25 04:25:08 -04:00
PREDICATE: unexpected-eof < unexpected got>> not ;
2008-06-25 04:25:08 -04:00
2013-03-24 00:35:50 -04:00
: throw-unexpected-eof ( word -- * ) f unexpected ;
2008-06-25 04:25:08 -04:00
2013-03-24 00:35:50 -04:00
: scan-token ( -- str )
?scan-token [ "token" throw-unexpected-eof ] unless* ;
: expect ( token -- )
scan-token 2dup = [ 2drop ] [ unexpected ] if ;
: each-token ( ... end quot: ( ... token -- ... ) -- ... )
[ scan-token ] 2dip 2over =
[ 3drop ] [ [ nip call ] [ each-token ] 2bi ] if ; inline recursive
: map-tokens ( ... end quot: ( ... token -- ... elt ) -- ... seq )
2010-03-26 16:31:48 -04:00
collector [ each-token ] dip { } like ; inline
2008-06-25 04:25:08 -04:00
: parse-tokens ( end -- seq )
[ ] map-tokens ;
2008-06-25 04:25:08 -04:00
TUPLE: lexer-error line column line-text parsing-words error ;
2008-06-25 04:25:08 -04:00
M: lexer-error error-file error>> error-file ;
M: lexer-error error-line [ error>> error-line ] [ line>> ] bi or ;
2008-06-25 04:25:08 -04:00
: <lexer-error> ( msg -- error )
2014-02-21 04:10:21 -05:00
[
lexer get {
[ line>> ]
[ column>> ]
[ line-text>> ]
[ parsing-words>> clone ]
} cleave
] dip lexer-error boa ;
2008-06-25 04:25:08 -04:00
: simple-lexer-dump ( error -- )
2008-06-25 04:25:08 -04:00
[ line>> number>string ": " append ]
[ line-text>> ]
[ column>> ] tri
2008-06-25 04:25:08 -04:00
pick length + CHAR: \s <string>
[ write ] [ print ] [ write "^" print ] tri* ;
: (parsing-word-lexer-dump) ( error parsing-word -- )
[
line>> number>string
over line>> number>string length
CHAR: \s pad-head
": " append write
] [ line-text>> print ] bi
simple-lexer-dump ;
: parsing-word-lexer-dump ( error parsing-word -- )
2012-07-21 13:22:44 -04:00
2dup [ line>> ] same?
[ drop simple-lexer-dump ]
[ (parsing-word-lexer-dump) ] if ;
: lexer-dump ( error -- )
dup parsing-words>>
[ simple-lexer-dump ]
[ last parsing-word-lexer-dump ] if-empty ;
2008-06-25 04:25:08 -04:00
: with-lexer ( lexer quot -- newquot )
[ [ <lexer-error> rethrow ] recover ] curry
[ lexer ] dip with-variable ; inline