| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | ! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman. | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  | USING: accessors arrays assocs combinators kernel kernel.private | 
					
						
							|  |  |  | lexer math math.parser namespaces sbufs sequences splitting | 
					
						
							|  |  |  | strings ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | IN: strings.parser | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-23 18:55:54 -04:00
										 |  |  | ERROR: bad-escape char ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : escape ( escape -- ch )
 | 
					
						
							|  |  |  |     H{ | 
					
						
							|  |  |  |         { CHAR: a  CHAR: \a } | 
					
						
							| 
									
										
										
										
											2014-06-03 21:04:51 -04:00
										 |  |  |         { CHAR: b  CHAR: \b } | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         { CHAR: e  CHAR: \e } | 
					
						
							| 
									
										
										
										
											2014-06-03 21:04:51 -04:00
										 |  |  |         { CHAR: f  CHAR: \f } | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         { CHAR: n  CHAR: \n } | 
					
						
							|  |  |  |         { CHAR: r  CHAR: \r } | 
					
						
							|  |  |  |         { CHAR: t  CHAR: \t } | 
					
						
							|  |  |  |         { CHAR: s  CHAR: \s } | 
					
						
							| 
									
										
										
										
											2014-06-03 21:04:51 -04:00
										 |  |  |         { CHAR: v  CHAR: \v } | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         { CHAR: \s CHAR: \s } | 
					
						
							|  |  |  |         { CHAR: 0  CHAR: \0 } | 
					
						
							|  |  |  |         { CHAR: \\ CHAR: \\ } | 
					
						
							|  |  |  |         { CHAR: \" CHAR: \" } | 
					
						
							| 
									
										
										
										
											2015-08-13 19:13:05 -04:00
										 |  |  |     } ?at [ bad-escape ] unless ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: name>char-hook | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-02-10 17:16:12 -05:00
										 |  |  | name>char-hook [ | 
					
						
							|  |  |  |     [ "Unicode support not available" throw ] | 
					
						
							|  |  |  | ] initialize
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-07-26 22:24:25 -04:00
										 |  |  | : hex-escape ( str -- ch str' )
 | 
					
						
							|  |  |  |     2 cut-slice [ hex> ] dip ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | : unicode-escape ( str -- ch str' )
 | 
					
						
							|  |  |  |     "{" ?head-slice [ | 
					
						
							|  |  |  |         CHAR: } over index cut-slice
 | 
					
						
							| 
									
										
										
										
											2009-03-16 21:11:36 -04:00
										 |  |  |         [ >string name>char-hook get call( name -- char ) ] dip
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         rest-slice
 | 
					
						
							|  |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2008-11-23 03:44:56 -05:00
										 |  |  |         6 cut-slice [ hex> ] dip
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : next-escape ( str -- ch str' )
 | 
					
						
							| 
									
										
										
										
											2014-06-04 00:04:05 -04:00
										 |  |  |     unclip-slice { | 
					
						
							|  |  |  |         { CHAR: u [ unicode-escape ] } | 
					
						
							|  |  |  |         { CHAR: x [ hex-escape ] } | 
					
						
							|  |  |  |         [ escape swap ] | 
					
						
							| 
									
										
										
										
											2012-07-26 22:24:25 -04:00
										 |  |  |     } case ;
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (unescape-string) ( accum str i/f -- accum )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf object object } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ | 
					
						
							| 
									
										
										
										
											2014-12-03 14:37:34 -05:00
										 |  |  |         cut-slice [ append! ] dip
 | 
					
						
							|  |  |  |         rest-slice next-escape [ suffix! ] dip
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         CHAR: \\ over index (unescape-string) | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2014-12-03 14:37:34 -05:00
										 |  |  |         append!
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     ] if* ;
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : unescape-string ( str -- str' )
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     CHAR: \\ over index [ | 
					
						
							|  |  |  |         [ [ length <sbuf> ] keep ] dip (unescape-string) | 
					
						
							|  |  |  |     ] when* "" like ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : (parse-string) ( accum str -- accum m )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf slice } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     dup [ "\"\\" member? ] find [ | 
					
						
							| 
									
										
										
										
											2014-12-03 14:37:34 -05:00
										 |  |  |         [ cut-slice [ append! ] dip rest-slice ] dip
 | 
					
						
							| 
									
										
										
										
											2009-03-16 21:11:36 -04:00
										 |  |  |         CHAR: " = [ | 
					
						
							|  |  |  |             from>> | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         ] [ | 
					
						
							| 
									
										
										
										
											2014-12-03 14:37:34 -05:00
										 |  |  |             next-escape [ suffix! ] dip (parse-string) | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |         ] if
 | 
					
						
							|  |  |  |     ] [ | 
					
						
							|  |  |  |         "Unterminated string" throw
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     ] if* ;
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-string ( -- str )
 | 
					
						
							| 
									
										
										
										
											2014-06-04 00:05:36 -04:00
										 |  |  |     SBUF" " clone lexer get [ | 
					
						
							|  |  |  |         swap tail-slice (parse-string) [ "" like ] dip
 | 
					
						
							| 
									
										
										
										
											2008-06-25 04:25:08 -04:00
										 |  |  |     ] change-lexer-column ;
 | 
					
						
							| 
									
										
										
										
											2008-12-08 20:46:40 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : lexer-subseq ( i lexer -- before )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { fixnum lexer } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ [ column>> ] [ line-text>> ] bi swapd subseq ] | 
					
						
							|  |  |  |     [ column<< ] 2bi ;
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  | : rest-of-line ( lexer -- seq )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer } declare | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |     [ line-text>> ] [ column>> ] bi tail-slice ;
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  | : current-char ( lexer -- ch/f )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer } declare | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |     [ column>> ] [ line-text>> ] bi ?nth ;
 | 
					
						
							| 
									
										
										
										
											2009-09-20 15:18:19 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : advance-char ( lexer -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer } declare | 
					
						
							| 
									
										
										
										
											2009-09-20 15:18:19 -04:00
										 |  |  |     [ 1 + ] change-column drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ERROR: escaped-char-expected ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : next-char ( lexer -- ch )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer } declare | 
					
						
							| 
									
										
										
										
											2009-09-20 15:18:19 -04:00
										 |  |  |     dup still-parsing-line? [ | 
					
						
							|  |  |  |         [ current-char ] [ advance-char ] bi
 | 
					
						
							|  |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2015-08-13 19:13:05 -04:00
										 |  |  |         escaped-char-expected | 
					
						
							| 
									
										
										
										
											2009-09-20 15:18:19 -04:00
										 |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : lexer-head? ( lexer string -- ? )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer string } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ rest-of-line ] dip head? ;
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : advance-lexer ( lexer n -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer fixnum } declare | 
					
						
							|  |  |  |     [ + ] curry change-column drop ;
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : find-next-token ( lexer ch -- i elt )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer fixnum } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ [ column>> ] [ line-text>> ] bi ] dip
 | 
					
						
							|  |  |  |     CHAR: \ 2array [ member? ] curry find-from ;
 | 
					
						
							| 
									
										
										
										
											2009-09-24 20:43:57 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : next-line% ( accum lexer -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf lexer } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ rest-of-line swap push-all ] | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     [ next-line CHAR: \n swap push ] 2bi ;
 | 
					
						
							| 
									
										
										
										
											2009-09-24 20:43:57 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : take-double-quotes ( lexer -- string )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { lexer } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     dup current-char CHAR: " = [ | 
					
						
							|  |  |  |         dup [ column>> ] [ line-text>> ] bi
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |         [ CHAR: " = not ] find-from drop [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |             over column>> - CHAR: " <repetition>
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |         ] [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |             dup rest-of-line | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |         ] if*
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         [ length advance-lexer ] keep
 | 
					
						
							|  |  |  |     ] [ drop f ] if ;
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : end-string-parse ( accum lexer delimiter -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf lexer string } declare | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |     length 3 = [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         take-double-quotes 3 tail-slice swap push-all
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         advance-char drop
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  | DEFER: (parse-multiline-string-until) | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  | : parse-found-token ( accum lexer string i token -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf lexer string fixnum fixnum } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     [ [ 2over ] dip swap lexer-subseq swap push-all ] dip
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |     CHAR: \ = [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         2over next-char swap push
 | 
					
						
							|  |  |  |         2over next-char swap push
 | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  |         (parse-multiline-string-until) | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |         2dup lexer-head? [ | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |             end-string-parse | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |         ] [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |             2over next-char swap push
 | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  |             (parse-multiline-string-until) | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |         ] if
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2008-12-08 20:46:40 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  | : (parse-multiline-string-until) ( accum lexer string -- )
 | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     { sbuf lexer fixnum } declare | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |     over still-parsing? [ | 
					
						
							|  |  |  |         2dup first find-next-token [ | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |             parse-found-token | 
					
						
							|  |  |  |         ] [ | 
					
						
							| 
									
										
										
										
											2014-05-19 17:14:02 -04:00
										 |  |  |             drop 2over next-line% | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  |             (parse-multiline-string-until) | 
					
						
							| 
									
										
										
										
											2009-09-20 22:50:17 -04:00
										 |  |  |         ] if*
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2013-03-24 00:42:26 -04:00
										 |  |  |         throw-unexpected-eof | 
					
						
							| 
									
										
										
										
											2014-05-20 11:20:34 -04:00
										 |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2009-09-19 04:55:05 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-09-24 20:43:57 -04:00
										 |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2009-09-20 15:18:19 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-07-26 01:59:56 -04:00
										 |  |  | : parse-multiline-string-until ( arg -- string )
 | 
					
						
							|  |  |  |     [ SBUF" " clone ] dip [ | 
					
						
							|  |  |  |         [ lexer get ] dip (parse-multiline-string-until) | 
					
						
							|  |  |  |     ] curry keep unescape-string ;
 |