diff --git a/core/strings/parser/parser-tests.factor b/core/strings/parser/parser-tests.factor index 80f649c204..c7ce142269 100644 --- a/core/strings/parser/parser-tests.factor +++ b/core/strings/parser/parser-tests.factor @@ -1,4 +1,14 @@ -IN: strings.parser.tests USING: strings.parser tools.test ; +IN: strings.parser.tests [ "Hello\n\rworld" ] [ "Hello\\n\\rworld" unescape-string ] unit-test + +[ "Hello\n\rworld" ] [ "Hello\n\rworld" ] unit-test +[ "Hello\n\rworld" ] [ """Hello\n\rworld""" ] unit-test +[ "Hello\n\rworld\n" ] [ "Hello\n\rworld +" ] unit-test +[ "Hello\n\rworld" "hi" ] [ "Hello\n\rworld" "hi" ] unit-test +[ "Hello\n\rworld" "hi" ] [ """Hello\n\rworld""" """hi""" ] unit-test +[ "Hello\n\rworld\n" "hi" ] [ """Hello\n\rworld +""" """hi""" ] unit-test +[ "Hello\n\rworld\"" "hi" ] [ """Hello\n\rworld\"""" """hi""" ] unit-test diff --git a/core/strings/parser/parser.factor b/core/strings/parser/parser.factor index c6e58f659a..22b84c830e 100644 --- a/core/strings/parser/parser.factor +++ b/core/strings/parser/parser.factor @@ -1,7 +1,7 @@ -! Copyright (C) 2008 Slava Pestov. +! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel assocs namespaces make splitting sequences -strings math.parser lexer accessors ; +USING: accessors assocs kernel lexer make math math.parser +namespaces parser sequences splitting strings arrays ; IN: strings.parser ERROR: bad-escape ; @@ -42,6 +42,18 @@ name>char-hook [ unclip-slice escape swap ] if ; +: (unescape-string) ( str -- ) + CHAR: \\ over index dup [ + cut-slice [ % ] dip rest-slice + next-escape [ , ] dip + (unescape-string) + ] [ + drop % + ] if ; + +: unescape-string ( str -- str' ) + [ (unescape-string) ] "" make ; + : (parse-string) ( str -- m ) dup [ "\"\\" member? ] find dup [ [ cut-slice [ % ] dip rest-slice ] dip @@ -59,14 +71,79 @@ name>char-hook [ [ swap tail-slice (parse-string) ] "" make swap ] change-lexer-column ; -: (unescape-string) ( str -- ) - CHAR: \\ over index dup [ - cut-slice [ % ] dip rest-slice - next-escape [ , ] dip - (unescape-string) +> ] [ line-text>> ] bi + ] dip swap subseq ] [ - drop % + lexer get (>>column) + ] bi ; + +: find-next-token ( ch -- i elt ) + CHAR: \ 2array + [ lexer get [ column>> ] [ line-text>> ] bi ] dip + [ member? ] curry find-from ; + +: rest-of-line ( -- seq ) + lexer get [ line-text>> ] [ column>> ] bi tail-slice ; + +: parse-escape ( i -- ) + lexer-advance % CHAR: \ , + lexer get + [ [ 2 + ] change-column drop ] + [ [ column>> 1 - ] [ line-text>> ] bi nth , ] bi ; + +: next-string-line ( obj -- ) + drop rest-of-line % + lexer get next-line "\n" % ; + +: rest-begins? ( string -- ? ) + [ + lexer get [ line-text>> ] [ column>> ] bi tail-slice + ] dip head? ; + +DEFER: (parse-long-string) + +: parse-rest-of-line ( string i token -- ) + CHAR: \ = [ + parse-escape (parse-long-string) + ] [ + lexer-advance % + dup rest-begins? [ + [ lexer get ] dip length [ + ] curry change-column drop + ] [ + rest-of-line % + lexer get next-line "\n" % (parse-long-string) + ] if ] if ; -: unescape-string ( str -- str' ) - [ (unescape-string) ] "" make ; +: parse-til-separator ( string -- ) + dup first find-next-token [ + parse-rest-of-line + ] [ + next-string-line (parse-long-string) + ] if* ; + +: (parse-long-string) ( string -- ) + lexer get still-parsing? [ + parse-til-separator + ] [ + unexpected-eof + ] if ; + +PRIVATE> + +: parse-long-string ( string -- string' ) + [ (parse-long-string) ] "" make unescape-string ; + +: parse-multiline-string ( -- string ) + rest-of-line "\"\"" head? [ + lexer get [ 2 + ] change-column drop + "\"\"\"" parse-long-string + ] [ + "\"" parse-long-string + ] if ; diff --git a/core/strings/strings.factor b/core/strings/strings.factor index 8ab0409318..18af08b3f6 100644 --- a/core/strings/strings.factor +++ b/core/strings/strings.factor @@ -25,7 +25,7 @@ PRIVATE> M: string equal? over string? [ - over hashcode over hashcode eq? + 2dup [ hashcode ] bi@ eq? [ sequence= ] [ 2drop f ] if ] [ 2drop f diff --git a/core/syntax/syntax-docs.factor b/core/syntax/syntax-docs.factor index e34fb0957f..551cc76c0e 100644 --- a/core/syntax/syntax-docs.factor +++ b/core/syntax/syntax-docs.factor @@ -532,7 +532,7 @@ HELP: CHAR: HELP: " { $syntax "\"string...\"" } { $values { "string" "literal and escaped characters" } } -{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals cannot span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." } +{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals can span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." } { $examples "A string with a newline in it:" { $example "USE: io" "\"Hello\\nworld\" print" "Hello\nworld" } diff --git a/core/syntax/syntax.factor b/core/syntax/syntax.factor index 16645e3342..80c7a42f30 100644 --- a/core/syntax/syntax.factor +++ b/core/syntax/syntax.factor @@ -86,7 +86,7 @@ IN: bootstrap.syntax } cond parsed ] define-core-syntax - "\"" [ parse-string parsed ] define-core-syntax + "\"" [ parse-multiline-string parsed ] define-core-syntax "SBUF\"" [ lexer get skip-blank parse-string >sbuf parsed