add multiline string support

2009-09-19 01:55:05 -07:00 · 2009-09-19 01:55:05 -07:00 · dc4a544a92
parent 086d4a87b4
commit dc4a544a92
5 changed files with 102 additions and 15 deletions
--- a/core/strings/parser/parser-tests.factor
+++ b/core/strings/parser/parser-tests.factor
@ -1,4 +1,14 @@
 IN: strings.parser.tests
 USING: strings.parser tools.test ;
 IN: strings.parser.tests
 [ "Hello\n\rworld" ] [ "Hello\\n\\rworld" unescape-string ] unit-test
 [ "Hello\n\rworld" ] [ "Hello\n\rworld" ] unit-test
 [ "Hello\n\rworld" ] [ """Hello\n\rworld""" ] unit-test
 [ "Hello\n\rworld\n" ] [ "Hello\n\rworld
 " ] unit-test
 [ "Hello\n\rworld" "hi" ] [ "Hello\n\rworld" "hi" ] unit-test
 [ "Hello\n\rworld" "hi" ] [ """Hello\n\rworld""" """hi""" ] unit-test
 [ "Hello\n\rworld\n" "hi" ] [ """Hello\n\rworld
 """ """hi""" ] unit-test
 [ "Hello\n\rworld\"" "hi" ] [ """Hello\n\rworld\"""" """hi""" ] unit-test
--- a/core/strings/parser/parser.factor
+++ b/core/strings/parser/parser.factor
@ -1,7 +1,7 @@
-! Copyright (C) 2008 Slava Pestov.
+! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: kernel assocs namespaces make splitting sequences
+USING: accessors assocs kernel lexer make math math.parser
-strings math.parser lexer accessors ;
+namespaces parser sequences splitting strings arrays ;
 IN: strings.parser
 ERROR: bad-escape ;
@ -42,6 +42,18 @@ name>char-hook [
        unclip-slice escape swap
    ] if ;
 : (unescape-string) ( str -- )
    CHAR: \\ over index dup [
        cut-slice [ % ] dip rest-slice
        next-escape [ , ] dip
        (unescape-string)
    ] [
        drop %
    ] if ;
 : unescape-string ( str -- str' )
    [ (unescape-string) ] "" make ;
 : (parse-string) ( str -- m )
    dup [ "\"\\" member? ] find dup [
        [ cut-slice [ % ] dip rest-slice ] dip
@ -59,14 +71,79 @@ name>char-hook [
        [ swap tail-slice (parse-string) ] "" make swap
    ] change-lexer-column ;
-: (unescape-string) ( str -- )
+<PRIVATE
-    CHAR: \\ over index dup [
+
-        cut-slice [ % ] dip rest-slice
+: lexer-advance ( i -- before )
-        next-escape [ , ] dip
+    [
-        (unescape-string)
+        [
            lexer get
            [ column>> ] [ line-text>> ] bi
        ] dip swap subseq
    ] [
-        drop %
+        lexer get (>>column)
    ] bi ;
 : find-next-token ( ch -- i elt )
    CHAR: \ 2array
    [ lexer get [ column>> ] [ line-text>> ] bi ] dip
    [ member? ] curry find-from ;
 : rest-of-line ( -- seq )
    lexer get [ line-text>> ] [ column>> ] bi tail-slice ;
 : parse-escape ( i -- )
    lexer-advance % CHAR: \ ,
    lexer get
    [ [ 2 + ] change-column drop ]
    [ [ column>> 1 - ] [ line-text>> ] bi nth , ] bi ;
 : next-string-line ( obj -- )
    drop rest-of-line %
    lexer get next-line "\n" % ;
 : rest-begins? ( string -- ? )
    [
        lexer get [ line-text>> ] [ column>> ] bi tail-slice
    ] dip head? ;
 DEFER: (parse-long-string)
 : parse-rest-of-line ( string i token -- )
    CHAR: \ = [
        parse-escape (parse-long-string)
    ] [
        lexer-advance %
        dup rest-begins? [
            [ lexer get ] dip length [ + ] curry change-column drop
        ] [
            rest-of-line %
            lexer get next-line "\n" % (parse-long-string)
        ] if
    ] if ;
-: unescape-string ( str -- str' )
+: parse-til-separator ( string -- )
-    [ (unescape-string) ] "" make ;
+    dup first find-next-token [
        parse-rest-of-line
    ] [
        next-string-line (parse-long-string)
    ] if* ;
 : (parse-long-string) ( string -- )
    lexer get still-parsing? [
        parse-til-separator
    ] [
        unexpected-eof
    ] if ;
 PRIVATE>
 : parse-long-string ( string -- string' )
    [ (parse-long-string) ] "" make unescape-string ;
 : parse-multiline-string ( -- string )
    rest-of-line "\"\"" head? [
        lexer get [ 2 + ] change-column drop
        "\"\"\"" parse-long-string
    ] [
        "\"" parse-long-string
    ] if ;
--- a/core/strings/strings.factor
+++ b/core/strings/strings.factor
@ -25,7 +25,7 @@ PRIVATE>
 M: string equal?
    over string? [
-        over hashcode over hashcode eq?
+        2dup [ hashcode ] bi@ eq?
        [ sequence= ] [ 2drop f ] if
    ] [
        2drop f
--- a/core/syntax/syntax-docs.factor
+++ b/core/syntax/syntax-docs.factor
@ -532,7 +532,7 @@ HELP: CHAR:
 HELP: "
 { $syntax "\"string...\"" }
 { $values { "string" "literal and escaped characters" } }
-{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals cannot span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." }
+{ $description "Reads from the input string until the next occurrence of " { $link POSTPONE: " } ", and appends the resulting string to the parse tree. String literals can span multiple lines. Strings containing the " { $link POSTPONE: " } " character and various other special characters can be read by inserting " { $link "escape" } "." }
 { $examples
  "A string with a newline in it:"
  { $example "USE: io" "\"Hello\\nworld\" print" "Hello\nworld" }
--- a/core/syntax/syntax.factor
+++ b/core/syntax/syntax.factor
@ -86,7 +86,7 @@ IN: bootstrap.syntax
        } cond parsed
    ] define-core-syntax
-    "\"" [ parse-string parsed ] define-core-syntax
+    "\"" [ parse-multiline-string parsed ] define-core-syntax
    "SBUF\"" [
        lexer get skip-blank parse-string >sbuf parsed