add multiline string support
parent
086d4a87b4
commit
dc4a544a92
|
@ -1,4 +1,14 @@
|
||||||
IN: strings.parser.tests
|
|
||||||
USING: strings.parser tools.test ;
|
USING: strings.parser tools.test ;
|
||||||
|
IN: strings.parser.tests
|
||||||
|
|
||||||
[ "Hello\n\rworld" ] [ "Hello\\n\\rworld" unescape-string ] unit-test
|
[ "Hello\n\rworld" ] [ "Hello\\n\\rworld" unescape-string ] unit-test
|
||||||
|
|
||||||
|
[ "Hello\n\rworld" ] [ "Hello\n\rworld" ] unit-test
|
||||||
|
[ "Hello\n\rworld" ] [ """Hello\n\rworld""" ] unit-test
|
||||||
|
[ "Hello\n\rworld\n" ] [ "Hello\n\rworld
|
||||||
|
" ] unit-test
|
||||||
|
[ "Hello\n\rworld" "hi" ] [ "Hello\n\rworld" "hi" ] unit-test
|
||||||
|
[ "Hello\n\rworld" "hi" ] [ """Hello\n\rworld""" """hi""" ] unit-test
|
||||||
|
[ "Hello\n\rworld\n" "hi" ] [ """Hello\n\rworld
|
||||||
|
""" """hi""" ] unit-test
|
||||||
|
[ "Hello\n\rworld\"" "hi" ] [ """Hello\n\rworld\"""" """hi""" ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Slava Pestov.
|
! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel assocs namespaces make splitting sequences
|
USING: accessors assocs kernel lexer make math math.parser
|
||||||
strings math.parser lexer accessors ;
|
namespaces parser sequences splitting strings arrays ;
|
||||||
IN: strings.parser
|
IN: strings.parser
|
||||||
|
|
||||||
ERROR: bad-escape ;
|
ERROR: bad-escape ;
|
||||||
|
@ -42,6 +42,18 @@ name>char-hook [
|
||||||
unclip-slice escape swap
|
unclip-slice escape swap
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
|
: (unescape-string) ( str -- )
|
||||||
|
CHAR: \\ over index dup [
|
||||||
|
cut-slice [ % ] dip rest-slice
|
||||||
|
next-escape [ , ] dip
|
||||||
|
(unescape-string)
|
||||||
|
] [
|
||||||
|
drop %
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
: unescape-string ( str -- str' )
|
||||||
|
[ (unescape-string) ] "" make ;
|
||||||
|
|
||||||
: (parse-string) ( str -- m )
|
: (parse-string) ( str -- m )
|
||||||
dup [ "\"\\" member? ] find dup [
|
dup [ "\"\\" member? ] find dup [
|
||||||
[ cut-slice [ % ] dip rest-slice ] dip
|
[ cut-slice [ % ] dip rest-slice ] dip
|
||||||
|
@ -59,14 +71,79 @@ name>char-hook [
|
||||||
[ swap tail-slice (parse-string) ] "" make swap
|
[ swap tail-slice (parse-string) ] "" make swap
|
||||||
] change-lexer-column ;
|
] change-lexer-column ;
|
||||||
|
|
||||||
: (unescape-string) ( str -- )
|
<PRIVATE
|
||||||
CHAR: \\ over index dup [
|
|
||||||
cut-slice [ % ] dip rest-slice
|
: lexer-advance ( i -- before )
|
||||||
next-escape [ , ] dip
|
[
|
||||||
(unescape-string)
|
[
|
||||||
|
lexer get
|
||||||
|
[ column>> ] [ line-text>> ] bi
|
||||||
|
] dip swap subseq
|
||||||
] [
|
] [
|
||||||
drop %
|
lexer get (>>column)
|
||||||
|
] bi ;
|
||||||
|
|
||||||
|
: find-next-token ( ch -- i elt )
|
||||||
|
CHAR: \ 2array
|
||||||
|
[ lexer get [ column>> ] [ line-text>> ] bi ] dip
|
||||||
|
[ member? ] curry find-from ;
|
||||||
|
|
||||||
|
: rest-of-line ( -- seq )
|
||||||
|
lexer get [ line-text>> ] [ column>> ] bi tail-slice ;
|
||||||
|
|
||||||
|
: parse-escape ( i -- )
|
||||||
|
lexer-advance % CHAR: \ ,
|
||||||
|
lexer get
|
||||||
|
[ [ 2 + ] change-column drop ]
|
||||||
|
[ [ column>> 1 - ] [ line-text>> ] bi nth , ] bi ;
|
||||||
|
|
||||||
|
: next-string-line ( obj -- )
|
||||||
|
drop rest-of-line %
|
||||||
|
lexer get next-line "\n" % ;
|
||||||
|
|
||||||
|
: rest-begins? ( string -- ? )
|
||||||
|
[
|
||||||
|
lexer get [ line-text>> ] [ column>> ] bi tail-slice
|
||||||
|
] dip head? ;
|
||||||
|
|
||||||
|
DEFER: (parse-long-string)
|
||||||
|
|
||||||
|
: parse-rest-of-line ( string i token -- )
|
||||||
|
CHAR: \ = [
|
||||||
|
parse-escape (parse-long-string)
|
||||||
|
] [
|
||||||
|
lexer-advance %
|
||||||
|
dup rest-begins? [
|
||||||
|
[ lexer get ] dip length [ + ] curry change-column drop
|
||||||
|
] [
|
||||||
|
rest-of-line %
|
||||||
|
lexer get next-line "\n" % (parse-long-string)
|
||||||
|
] if
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: unescape-string ( str -- str' )
|
: parse-til-separator ( string -- )
|
||||||
[ (unescape-string) ] "" make ;
|
dup first find-next-token [
|
||||||
|
parse-rest-of-line
|
||||||
|
] [
|
||||||
|
next-string-line (parse-long-string)
|
||||||
|
] if* ;
|
||||||
|
|
||||||
|
: (parse-long-string) ( string -- )
|
||||||
|
lexer get still-parsing? [
|
||||||
|
parse-til-separator
|
||||||
|
] [
|
||||||
|
unexpected-eof
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
|
: parse-long-string ( string -- string' )
|
||||||
|
[ (parse-long-string) ] "" make unescape-string ;
|
||||||
|
|
||||||
|
: parse-multiline-string ( -- string )
|
||||||
|
rest-of-line "\"\"" head? [
|
||||||
|
lexer get [ 2 + ] change-column drop
|
||||||
|
"\"\"\"" parse-long-string
|
||||||
|
] [
|
||||||
|
"\"" parse-long-string
|
||||||
|
] if ;
|
||||||
|
|
|
@ -25,7 +25,7 @@ PRIVATE>
|
||||||
|
|
||||||
M: string equal?
|
M: string equal?
|
||||||
over string? [
|
over string? [
|
||||||
over hashcode over hashcode eq?
|
2dup [ hashcode ] bi@ eq?
|
||||||
[ sequence= ] [ 2drop f ] if
|
[ sequence= ] [ 2drop f ] if
|
||||||
] [
|
] [
|
||||||
2drop f
|
2drop f
|
||||||
|
|
|
@ -532,7 +532,7 @@ HELP: CHAR:
|
||||||
HELP: "
|
HELP: "
|
||||||
{ $syntax "\"string...\"" }
|
{ $syntax "\"string...\"" }
|
||||||
{ $values { "string" "literal and escaped characters" } }
|
{ $values { "string" "literal and escaped characters" } }
|
||||||
{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals cannot span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." }
|
{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals can span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." }
|
||||||
{ $examples
|
{ $examples
|
||||||
"A string with a newline in it:"
|
"A string with a newline in it:"
|
||||||
{ $example "USE: io" "\"Hello\\nworld\" print" "Hello\nworld" }
|
{ $example "USE: io" "\"Hello\\nworld\" print" "Hello\nworld" }
|
||||||
|
|
|
@ -86,7 +86,7 @@ IN: bootstrap.syntax
|
||||||
} cond parsed
|
} cond parsed
|
||||||
] define-core-syntax
|
] define-core-syntax
|
||||||
|
|
||||||
"\"" [ parse-string parsed ] define-core-syntax
|
"\"" [ parse-multiline-string parsed ] define-core-syntax
|
||||||
|
|
||||||
"SBUF\"" [
|
"SBUF\"" [
|
||||||
lexer get skip-blank parse-string >sbuf parsed
|
lexer get skip-blank parse-string >sbuf parsed
|
||||||
|
|
Loading…
Reference in New Issue