2009-02-02 14:43:54 -05:00
|
|
|
! Copyright (C) 2008, 2009 Slava Pestov.
|
2008-06-25 04:25:08 -04:00
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
USING: kernel sequences accessors namespaces math words strings
|
2008-07-28 23:03:13 -04:00
|
|
|
io vectors arrays math.parser combinators continuations ;
|
2008-06-25 04:25:08 -04:00
|
|
|
IN: lexer
|
|
|
|
|
|
|
|
TUPLE: lexer text line line-text line-length column ;
|
|
|
|
|
|
|
|
: next-line ( lexer -- )
|
|
|
|
dup [ line>> ] [ text>> ] bi ?nth >>line-text
|
|
|
|
dup line-text>> length >>line-length
|
2009-05-01 20:58:24 -04:00
|
|
|
[ 1 + ] change-line
|
2008-06-25 04:25:08 -04:00
|
|
|
0 >>column
|
|
|
|
drop ;
|
|
|
|
|
|
|
|
: new-lexer ( text class -- lexer )
|
|
|
|
new
|
|
|
|
0 >>line
|
|
|
|
swap >>text
|
|
|
|
dup next-line ; inline
|
|
|
|
|
|
|
|
: <lexer> ( text -- lexer )
|
|
|
|
lexer new-lexer ;
|
|
|
|
|
|
|
|
: skip ( i seq ? -- n )
|
2009-02-02 14:43:54 -05:00
|
|
|
over length
|
|
|
|
[ [ swap CHAR: \s eq? xor ] curry find-from drop ] dip or ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: change-lexer-column ( lexer quot -- )
|
2009-02-02 14:43:54 -05:00
|
|
|
[ [ column>> ] [ line-text>> ] bi ] prepose keep
|
2008-08-29 17:52:10 -04:00
|
|
|
(>>column) ; inline
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
GENERIC: skip-blank ( lexer -- )
|
|
|
|
|
|
|
|
M: lexer skip-blank ( lexer -- )
|
|
|
|
[ t skip ] change-lexer-column ;
|
|
|
|
|
|
|
|
GENERIC: skip-word ( lexer -- )
|
|
|
|
|
|
|
|
M: lexer skip-word ( lexer -- )
|
|
|
|
[
|
2009-05-01 20:58:24 -04:00
|
|
|
2dup nth CHAR: " eq? [ drop 1 + ] [ f skip ] if
|
2008-06-25 04:25:08 -04:00
|
|
|
] change-lexer-column ;
|
|
|
|
|
|
|
|
: still-parsing? ( lexer -- ? )
|
2008-08-29 17:16:43 -04:00
|
|
|
[ line>> ] [ text>> ] bi length <= ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: still-parsing-line? ( lexer -- ? )
|
2008-08-29 17:16:43 -04:00
|
|
|
[ column>> ] [ line-length>> ] bi < ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: (parse-token) ( lexer -- str )
|
2008-08-29 17:16:43 -04:00
|
|
|
{
|
|
|
|
[ column>> ]
|
|
|
|
[ skip-word ]
|
|
|
|
[ column>> ]
|
|
|
|
[ line-text>> ]
|
|
|
|
} cleave subseq ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: parse-token ( lexer -- str/f )
|
|
|
|
dup still-parsing? [
|
|
|
|
dup skip-blank
|
|
|
|
dup still-parsing-line?
|
|
|
|
[ (parse-token) ] [ dup next-line parse-token ] if
|
|
|
|
] [ drop f ] if ;
|
|
|
|
|
|
|
|
: scan ( -- str/f ) lexer get parse-token ;
|
|
|
|
|
|
|
|
ERROR: unexpected want got ;
|
|
|
|
|
|
|
|
PREDICATE: unexpected-eof < unexpected
|
2008-08-29 17:16:43 -04:00
|
|
|
got>> not ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: unexpected-eof ( word -- * ) f unexpected ;
|
|
|
|
|
2008-11-22 18:27:40 -05:00
|
|
|
: expect ( token -- )
|
|
|
|
scan
|
|
|
|
[ 2dup = [ 2drop ] [ unexpected ] if ]
|
|
|
|
[ unexpected-eof ]
|
|
|
|
if* ;
|
|
|
|
|
2008-06-25 04:25:08 -04:00
|
|
|
: (parse-tokens) ( accum end -- accum )
|
|
|
|
scan 2dup = [
|
|
|
|
2drop
|
|
|
|
] [
|
|
|
|
[ pick push (parse-tokens) ] [ unexpected-eof ] if*
|
|
|
|
] if ;
|
|
|
|
|
|
|
|
: parse-tokens ( end -- seq )
|
|
|
|
100 <vector> swap (parse-tokens) >array ;
|
|
|
|
|
|
|
|
TUPLE: lexer-error line column line-text error ;
|
|
|
|
|
|
|
|
: <lexer-error> ( msg -- error )
|
|
|
|
\ lexer-error new
|
|
|
|
lexer get
|
|
|
|
[ line>> >>line ]
|
|
|
|
[ column>> >>column ]
|
|
|
|
[ line-text>> >>line-text ]
|
|
|
|
tri
|
|
|
|
swap >>error ;
|
|
|
|
|
|
|
|
: lexer-dump ( error -- )
|
|
|
|
[ line>> number>string ": " append ]
|
|
|
|
[ line-text>> dup string? [ drop "" ] unless ]
|
|
|
|
[ column>> 0 or ] tri
|
|
|
|
pick length + CHAR: \s <string>
|
|
|
|
[ write ] [ print ] [ write "^" print ] tri* ;
|
|
|
|
|
|
|
|
: with-lexer ( lexer quot -- newquot )
|
|
|
|
[ lexer set ] dip [ <lexer-error> rethrow ] recover ; inline
|
|
|
|
|
|
|
|
SYMBOL: lexer-factory
|
|
|
|
|
|
|
|
[ <lexer> ] lexer-factory set-global
|