| 
									
										
										
										
											2009-02-02 14:43:54 -05:00
										 |  |  | ! Copyright (C) 2008, 2009 Slava Pestov. | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							|  |  |  | USING: kernel sequences accessors namespaces math words strings | 
					
						
							| 
									
										
										
										
											2009-08-04 22:01:21 -04:00
										 |  |  | io vectors arrays math.parser combinators continuations | 
					
						
							|  |  |  | source-files.errors ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | IN: lexer | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  | TUPLE: lexer text line line-text line-length column parsing-words ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TUPLE: lexer-parsing-word word line line-text column ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : next-line ( lexer -- )
 | 
					
						
							|  |  |  |     dup [ line>> ] [ text>> ] bi ?nth >>line-text | 
					
						
							|  |  |  |     dup line-text>> length >>line-length | 
					
						
							| 
									
										
										
										
											2009-05-01 20:58:24 -04:00
										 |  |  |     [ 1 + ] change-line | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     0 >>column | 
					
						
							|  |  |  |     drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  | : push-parsing-word ( word -- )
 | 
					
						
							|  |  |  |     lexer-parsing-word new
 | 
					
						
							|  |  |  |         swap >>word | 
					
						
							|  |  |  |         lexer get [ | 
					
						
							|  |  |  |             [ line>>      >>line      ] | 
					
						
							|  |  |  |             [ line-text>> >>line-text ] | 
					
						
							|  |  |  |             [ column>>    >>column    ] tri
 | 
					
						
							|  |  |  |         ] [ parsing-words>> push ] bi ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : pop-parsing-word ( -- )
 | 
					
						
							|  |  |  |     lexer get parsing-words>> pop drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | : new-lexer ( text class -- lexer )
 | 
					
						
							|  |  |  |     new
 | 
					
						
							|  |  |  |         0 >>line | 
					
						
							|  |  |  |         swap >>text | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  |         V{ } clone >>parsing-words | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     dup next-line ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : <lexer> ( text -- lexer )
 | 
					
						
							|  |  |  |     lexer new-lexer ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-06-16 14:47:56 -04:00
										 |  |  | ERROR: unexpected want got ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : forbid-tab ( c -- c )
 | 
					
						
							| 
									
										
										
										
											2009-08-04 22:01:21 -04:00
										 |  |  |     [ CHAR: \t eq? [ "[space]" "[tab]" unexpected ] when ] keep ; inline
 | 
					
						
							| 
									
										
										
										
											2009-06-16 14:47:56 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | : skip ( i seq ? -- n )
 | 
					
						
							| 
									
										
										
										
											2009-02-02 14:43:54 -05:00
										 |  |  |     over length
 | 
					
						
							| 
									
										
										
										
											2009-06-16 14:47:56 -04:00
										 |  |  |     [ [ swap forbid-tab CHAR: \s eq? xor ] curry find-from drop ] dip or ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : change-lexer-column ( lexer quot -- )
 | 
					
						
							| 
									
										
										
										
											2009-02-02 14:43:54 -05:00
										 |  |  |     [ [ column>> ] [ line-text>> ] bi ] prepose keep
 | 
					
						
							| 
									
										
										
										
											2008-08-29 17:52:10 -04:00
										 |  |  |     (>>column) ; inline
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | GENERIC: skip-blank ( lexer -- )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: lexer skip-blank ( lexer -- )
 | 
					
						
							|  |  |  |     [ t skip ] change-lexer-column ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | GENERIC: skip-word ( lexer -- )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: lexer skip-word ( lexer -- )
 | 
					
						
							|  |  |  |     [ | 
					
						
							| 
									
										
										
										
											2009-05-01 20:58:24 -04:00
										 |  |  |         2dup nth CHAR: " eq? [ drop 1 + ] [ f skip ] if
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     ] change-lexer-column ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : still-parsing? ( lexer -- ? )
 | 
					
						
							| 
									
										
										
										
											2009-08-18 01:10:23 -04:00
										 |  |  |     [ line>> ] [ text>> length ] bi <= ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : still-parsing-line? ( lexer -- ? )
 | 
					
						
							| 
									
										
										
										
											2008-08-29 17:16:43 -04:00
										 |  |  |     [ column>> ] [ line-length>> ] bi < ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : (parse-token) ( lexer -- str )
 | 
					
						
							| 
									
										
										
										
											2008-08-29 17:16:43 -04:00
										 |  |  |     { | 
					
						
							|  |  |  |         [ column>> ] | 
					
						
							|  |  |  |         [ skip-word ] | 
					
						
							|  |  |  |         [ column>> ] | 
					
						
							|  |  |  |         [ line-text>> ] | 
					
						
							|  |  |  |     } cleave subseq ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | :  parse-token ( lexer -- str/f )
 | 
					
						
							|  |  |  |     dup still-parsing? [ | 
					
						
							|  |  |  |         dup skip-blank | 
					
						
							|  |  |  |         dup still-parsing-line? | 
					
						
							|  |  |  |         [ (parse-token) ] [ dup next-line parse-token ] if
 | 
					
						
							|  |  |  |     ] [ drop f ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : scan ( -- str/f ) lexer get parse-token ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-26 03:42:07 -04:00
										 |  |  | PREDICATE: unexpected-eof < unexpected got>> not ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : unexpected-eof ( word -- * ) f unexpected ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-22 18:27:40 -05:00
										 |  |  | : expect ( token -- )
 | 
					
						
							|  |  |  |     scan | 
					
						
							|  |  |  |     [ 2dup = [ 2drop ] [ unexpected ] if ] | 
					
						
							|  |  |  |     [ unexpected-eof ] | 
					
						
							|  |  |  |     if* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-08 21:55:46 -05:00
										 |  |  | : each-token ( ... end quot: ( ... token -- ... ) -- ... )
 | 
					
						
							| 
									
										
										
										
											2010-03-26 03:42:07 -04:00
										 |  |  |     [ scan ] 2dip { | 
					
						
							|  |  |  |         { [ 2over = ] [ 3drop ] } | 
					
						
							|  |  |  |         { [ pick not ] [ drop unexpected-eof ] } | 
					
						
							|  |  |  |         [ [ nip call ] [ each-token ] 2bi ] | 
					
						
							|  |  |  |     } cond ; inline recursive
 | 
					
						
							| 
									
										
										
										
											2010-03-01 01:06:47 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-08 21:55:46 -05:00
										 |  |  | : map-tokens ( ... end quot: ( ... token -- ... elt ) -- ... seq )
 | 
					
						
							| 
									
										
										
										
											2010-03-26 16:31:48 -04:00
										 |  |  |     collector [ each-token ] dip { } like ; inline
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-tokens ( end -- seq )
 | 
					
						
							| 
									
										
										
										
											2010-03-01 01:06:47 -05:00
										 |  |  |     [ ] map-tokens ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  | TUPLE: lexer-error line column line-text parsing-words error ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-08-04 22:01:21 -04:00
										 |  |  | M: lexer-error error-file error>> error-file ;
 | 
					
						
							| 
									
										
										
										
											2010-03-26 03:42:07 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-08-04 22:01:21 -04:00
										 |  |  | M: lexer-error error-line [ error>> error-line ] [ line>> ] bi or ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | : <lexer-error> ( msg -- error )
 | 
					
						
							|  |  |  |     \ lexer-error new
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  |         lexer get [ | 
					
						
							|  |  |  |             [ line>> >>line ] | 
					
						
							|  |  |  |             [ column>> >>column ] bi
 | 
					
						
							|  |  |  |         ] [  | 
					
						
							|  |  |  |             [ line-text>> >>line-text ] | 
					
						
							|  |  |  |             [ parsing-words>> clone >>parsing-words ] bi
 | 
					
						
							|  |  |  |         ] bi
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         swap >>error ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  | : simple-lexer-dump ( error -- )
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     [ line>> number>string ": " append ] | 
					
						
							|  |  |  |     [ line-text>> dup string? [ drop "" ] unless ] | 
					
						
							|  |  |  |     [ column>> 0 or ] tri
 | 
					
						
							|  |  |  |     pick length + CHAR: \s <string>
 | 
					
						
							|  |  |  |     [ write ] [ print ] [ write "^" print ] tri* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-28 16:01:03 -05:00
										 |  |  | : (parsing-word-lexer-dump) ( error parsing-word -- )
 | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         line>> number>string | 
					
						
							|  |  |  |         over line>> number>string length
 | 
					
						
							|  |  |  |         CHAR: \s pad-head
 | 
					
						
							|  |  |  |         ": " append write
 | 
					
						
							|  |  |  |     ] [ line-text>> dup string? [ drop "" ] unless print ] bi
 | 
					
						
							|  |  |  |     simple-lexer-dump ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : parsing-word-lexer-dump ( error parsing-word -- )
 | 
					
						
							|  |  |  |     2dup [ line>> ] bi@ =
 | 
					
						
							|  |  |  |     [ drop simple-lexer-dump ] | 
					
						
							|  |  |  |     [ (parsing-word-lexer-dump) ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : lexer-dump ( error -- )
 | 
					
						
							|  |  |  |     dup parsing-words>> [ simple-lexer-dump ] [ last parsing-word-lexer-dump ] if-empty ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | : with-lexer ( lexer quot -- newquot )
 | 
					
						
							|  |  |  |     [ lexer set ] dip [ <lexer-error> rethrow ] recover ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: lexer-factory | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | [ <lexer> ] lexer-factory set-global
 |