2009-09-19 04:55:05 -04:00
|
|
|
! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
|
2008-06-25 04:25:08 -04:00
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2009-09-19 04:55:05 -04:00
|
|
|
USING: accessors assocs kernel lexer make math math.parser
|
2009-09-20 22:50:17 -04:00
|
|
|
namespaces parser sequences splitting strings arrays
|
|
|
|
math.order ;
|
2008-06-25 04:25:08 -04:00
|
|
|
IN: strings.parser
|
|
|
|
|
2009-09-23 18:55:54 -04:00
|
|
|
ERROR: bad-escape char ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: escape ( escape -- ch )
|
|
|
|
H{
|
|
|
|
{ CHAR: a CHAR: \a }
|
|
|
|
{ CHAR: e CHAR: \e }
|
|
|
|
{ CHAR: n CHAR: \n }
|
|
|
|
{ CHAR: r CHAR: \r }
|
|
|
|
{ CHAR: t CHAR: \t }
|
|
|
|
{ CHAR: s CHAR: \s }
|
|
|
|
{ CHAR: \s CHAR: \s }
|
|
|
|
{ CHAR: 0 CHAR: \0 }
|
|
|
|
{ CHAR: \\ CHAR: \\ }
|
|
|
|
{ CHAR: \" CHAR: \" }
|
2009-09-23 18:55:54 -04:00
|
|
|
} ?at [ bad-escape ] unless ;
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
SYMBOL: name>char-hook
|
|
|
|
|
2009-02-10 17:16:12 -05:00
|
|
|
name>char-hook [
|
|
|
|
[ "Unicode support not available" throw ]
|
|
|
|
] initialize
|
2008-06-25 04:25:08 -04:00
|
|
|
|
|
|
|
: unicode-escape ( str -- ch str' )
|
|
|
|
"{" ?head-slice [
|
|
|
|
CHAR: } over index cut-slice
|
2009-03-16 21:11:36 -04:00
|
|
|
[ >string name>char-hook get call( name -- char ) ] dip
|
2008-06-25 04:25:08 -04:00
|
|
|
rest-slice
|
|
|
|
] [
|
2008-11-23 03:44:56 -05:00
|
|
|
6 cut-slice [ hex> ] dip
|
2008-06-25 04:25:08 -04:00
|
|
|
] if ;
|
|
|
|
|
|
|
|
: next-escape ( str -- ch str' )
|
|
|
|
"u" ?head-slice [
|
|
|
|
unicode-escape
|
|
|
|
] [
|
|
|
|
unclip-slice escape swap
|
|
|
|
] if ;
|
|
|
|
|
2009-09-19 04:55:05 -04:00
|
|
|
: (unescape-string) ( str -- )
|
|
|
|
CHAR: \\ over index dup [
|
|
|
|
cut-slice [ % ] dip rest-slice
|
|
|
|
next-escape [ , ] dip
|
|
|
|
(unescape-string)
|
|
|
|
] [
|
|
|
|
drop %
|
|
|
|
] if ;
|
|
|
|
|
|
|
|
: unescape-string ( str -- str' )
|
|
|
|
[ (unescape-string) ] "" make ;
|
|
|
|
|
2008-06-25 04:25:08 -04:00
|
|
|
: (parse-string) ( str -- m )
|
|
|
|
dup [ "\"\\" member? ] find dup [
|
2008-11-23 03:44:56 -05:00
|
|
|
[ cut-slice [ % ] dip rest-slice ] dip
|
2009-03-16 21:11:36 -04:00
|
|
|
CHAR: " = [
|
|
|
|
from>>
|
2008-06-25 04:25:08 -04:00
|
|
|
] [
|
2009-03-16 21:11:36 -04:00
|
|
|
next-escape [ , ] dip (parse-string)
|
2008-06-25 04:25:08 -04:00
|
|
|
] if
|
|
|
|
] [
|
|
|
|
"Unterminated string" throw
|
|
|
|
] if ;
|
|
|
|
|
|
|
|
: parse-string ( -- str )
|
|
|
|
lexer get [
|
|
|
|
[ swap tail-slice (parse-string) ] "" make swap
|
|
|
|
] change-lexer-column ;
|
2008-12-08 20:46:40 -05:00
|
|
|
|
2009-09-19 04:55:05 -04:00
|
|
|
<PRIVATE
|
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: lexer-before ( i -- before )
|
2009-09-19 04:55:05 -04:00
|
|
|
[
|
|
|
|
[
|
|
|
|
lexer get
|
|
|
|
[ column>> ] [ line-text>> ] bi
|
|
|
|
] dip swap subseq
|
2008-12-08 20:46:40 -05:00
|
|
|
] [
|
2009-09-19 04:55:05 -04:00
|
|
|
lexer get (>>column)
|
|
|
|
] bi ;
|
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: rest-of-line ( lexer -- seq )
|
|
|
|
[ line-text>> ] [ column>> ] bi tail-slice ;
|
2009-09-19 04:55:05 -04:00
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: current-char ( lexer -- ch/f )
|
|
|
|
[ column>> ] [ line-text>> ] bi ?nth ;
|
2009-09-20 15:18:19 -04:00
|
|
|
|
|
|
|
: advance-char ( lexer -- )
|
|
|
|
[ 1 + ] change-column drop ;
|
|
|
|
|
|
|
|
ERROR: escaped-char-expected ;
|
|
|
|
|
|
|
|
: next-char ( lexer -- ch )
|
|
|
|
dup still-parsing-line? [
|
|
|
|
[ current-char ] [ advance-char ] bi
|
|
|
|
] [
|
|
|
|
escaped-char-expected
|
|
|
|
] if ;
|
|
|
|
|
2009-09-19 04:55:05 -04:00
|
|
|
: rest-begins? ( string -- ? )
|
|
|
|
[
|
|
|
|
lexer get [ line-text>> ] [ column>> ] bi tail-slice
|
|
|
|
] dip head? ;
|
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: advance-lexer ( n -- )
|
|
|
|
[ lexer get ] dip [ + ] curry change-column drop ; inline
|
|
|
|
|
2009-09-24 20:43:57 -04:00
|
|
|
: find-next-token ( ch -- i elt )
|
|
|
|
CHAR: \ 2array
|
|
|
|
[ lexer get [ column>> ] [ line-text>> ] bi ] dip
|
|
|
|
[ member? ] curry find-from ;
|
|
|
|
|
|
|
|
: next-line% ( lexer -- )
|
|
|
|
[ rest-of-line % ]
|
|
|
|
[ next-line "\n" % ] bi ;
|
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: take-double-quotes ( -- string )
|
|
|
|
lexer get dup current-char CHAR: " = [
|
|
|
|
[ ] [ column>> ] [ line-text>> ] tri
|
|
|
|
[ CHAR: " = not ] find-from drop [
|
|
|
|
swap column>> - CHAR: " <repetition>
|
|
|
|
] [
|
|
|
|
rest-of-line
|
|
|
|
] if*
|
|
|
|
] [
|
|
|
|
drop f
|
|
|
|
] if dup length advance-lexer ;
|
|
|
|
|
|
|
|
: end-string-parse ( delimiter -- )
|
|
|
|
length 3 = [
|
|
|
|
take-double-quotes 3 tail %
|
|
|
|
] [
|
|
|
|
lexer get advance-char
|
|
|
|
] if ;
|
|
|
|
|
2009-09-24 20:43:57 -04:00
|
|
|
DEFER: ((parse-multiline-string))
|
2009-09-19 04:55:05 -04:00
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
: parse-found-token ( i string token -- )
|
|
|
|
[ lexer-before % ] dip
|
2009-09-19 04:55:05 -04:00
|
|
|
CHAR: \ = [
|
2009-09-24 20:43:57 -04:00
|
|
|
lexer get [ next-char , ] [ next-char , ] bi ((parse-multiline-string))
|
2009-09-19 04:55:05 -04:00
|
|
|
] [
|
|
|
|
dup rest-begins? [
|
2009-09-20 22:50:17 -04:00
|
|
|
end-string-parse
|
2009-09-19 04:55:05 -04:00
|
|
|
] [
|
2009-09-24 20:43:57 -04:00
|
|
|
lexer get next-char , ((parse-multiline-string))
|
2009-09-19 04:55:05 -04:00
|
|
|
] if
|
2008-12-08 20:46:40 -05:00
|
|
|
] if ;
|
|
|
|
|
2009-09-20 22:50:17 -04:00
|
|
|
ERROR: trailing-characters string ;
|
2009-09-19 04:55:05 -04:00
|
|
|
|
2009-09-24 20:43:57 -04:00
|
|
|
: ((parse-multiline-string)) ( string -- )
|
2009-09-19 04:55:05 -04:00
|
|
|
lexer get still-parsing? [
|
2009-09-20 22:50:17 -04:00
|
|
|
dup first find-next-token [
|
|
|
|
parse-found-token
|
|
|
|
] [
|
|
|
|
drop lexer get next-line%
|
2009-09-24 20:43:57 -04:00
|
|
|
((parse-multiline-string))
|
2009-09-20 22:50:17 -04:00
|
|
|
] if*
|
2009-09-19 04:55:05 -04:00
|
|
|
] [
|
|
|
|
unexpected-eof
|
|
|
|
] if ;
|
|
|
|
|
2009-09-24 20:43:57 -04:00
|
|
|
: (parse-multiline-string) ( string -- string' )
|
|
|
|
[ ((parse-multiline-string)) ] "" make ;
|
2009-09-20 22:50:17 -04:00
|
|
|
|
2009-09-24 20:43:57 -04:00
|
|
|
PRIVATE>
|
2009-09-20 15:18:19 -04:00
|
|
|
|
2009-09-19 04:55:05 -04:00
|
|
|
: parse-multiline-string ( -- string )
|
2009-09-20 22:50:17 -04:00
|
|
|
lexer get rest-of-line "\"\"" head? [
|
2009-09-19 04:55:05 -04:00
|
|
|
lexer get [ 2 + ] change-column drop
|
2009-09-20 22:50:17 -04:00
|
|
|
"\"\"\""
|
2009-09-19 04:55:05 -04:00
|
|
|
] [
|
2009-09-20 22:50:17 -04:00
|
|
|
"\""
|
2009-09-24 20:43:57 -04:00
|
|
|
] if (parse-multiline-string) unescape-string ;
|