fix string parsing

2009-09-20 21:50:17 -05:00 · 2009-09-20 21:50:17 -05:00 · a4a9dcce00
parent 31e7d355fe
commit a4a9dcce00
2 changed files with 52 additions and 35 deletions
--- a/core/strings/parser/parser-tests.factor
+++ b/core/strings/parser/parser-tests.factor
@ -31,3 +31,6 @@ IN: strings.parser.tests
 ] [
    "\"\"\"\"abc\"\"\"\"" eval( -- string )
 ] unit-test
 [ "\"\\" ] [ "\"\\" ] unit-test
--- a/core/strings/parser/parser.factor
+++ b/core/strings/parser/parser.factor
@ -1,7 +1,8 @@
 ! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs kernel lexer make math math.parser
-namespaces parser sequences splitting strings arrays ;
+namespaces parser sequences splitting strings arrays
 math.order ;
 IN: strings.parser
 ERROR: bad-escape ;
@ -73,7 +74,7 @@ name>char-hook [
 <PRIVATE
-: lexer-advance ( i -- before )
+: lexer-before ( i -- before )
    [
        [
            lexer get
@ -88,11 +89,11 @@ name>char-hook [
    [ lexer get [ column>> ] [ line-text>> ] bi ] dip
    [ member? ] curry find-from ;
-: rest-of-line ( -- seq )
+: rest-of-line ( lexer -- seq )
-    lexer get [ line-text>> ] [ column>> ] bi tail-slice ;
+    [ line-text>> ] [ column>> ] bi tail-slice ;
-: current-char ( lexer -- ch )
+: current-char ( lexer -- ch/f )
-    [ column>> ] [ line-text>> ] bi nth ;
+    [ column>> ] [ line-text>> ] bi ?nth ;
 : advance-char ( lexer -- )
    [ 1 + ] change-column drop ;
@ -106,61 +107,74 @@ ERROR: escaped-char-expected ;
        escaped-char-expected
    ] if ;
-: parse-escape ( i -- )
+: next-line% ( lexer -- )
-    lexer-advance % CHAR: \ ,
+    [ rest-of-line % ]
-    lexer get
+    [ next-line "\n" % ] bi ;
    [ advance-char ]
    [ next-char , ] bi ;
 : next-string-line ( obj -- )
    drop rest-of-line %
    lexer get next-line "\n" % ;
 : rest-begins? ( string -- ? )
    [
        lexer get [ line-text>> ] [ column>> ] bi tail-slice
    ] dip head? ;
 : advance-lexer ( n -- )
    [ lexer get ] dip [ + ] curry change-column drop ; inline
 : take-double-quotes ( -- string )
    lexer get dup current-char CHAR: " = [
        [ ] [ column>> ] [ line-text>> ] tri
        [ CHAR: " = not ] find-from drop [
            swap column>> - CHAR: " <repetition>
        ] [
            rest-of-line
        ] if*
    ] [
        drop f
    ] if dup length advance-lexer ;
 : end-string-parse ( delimiter -- )
    length 3 = [
        take-double-quotes 3 tail %
    ] [
        lexer get advance-char
    ] if ;
 DEFER: (parse-long-string)
-: parse-rest-of-line ( string i token -- )
+: parse-found-token ( i string token -- )
    [ lexer-before % ] dip
    CHAR: \ = [
-        parse-escape (parse-long-string)
+        lexer get [ next-char , ] [ next-char , ] bi (parse-long-string)
    ] [
        lexer-advance %
        dup rest-begins? [
-            [ lexer get ] dip length [ + ] curry change-column drop
+            end-string-parse
        ] [
            lexer get next-char , (parse-long-string)
        ] if
    ] if ;
-: parse-til-separator ( string -- )
+ERROR: trailing-characters string ;
    dup first find-next-token [
        parse-rest-of-line
    ] [
        next-string-line (parse-long-string)
    ] if* ;
 : (parse-long-string) ( string -- )
    lexer get still-parsing? [
-        parse-til-separator
+        dup first find-next-token [
            parse-found-token
        ] [
            drop lexer get next-line%
            (parse-long-string)
        ] if*
    ] [
        unexpected-eof
    ] if ;
 PRIVATE>
 : parse-long-string ( string -- string' )
    [ (parse-long-string) ] "" make ;
 : parse-long-string-escaped ( string -- string' )
    parse-long-string unescape-string ;
 PRIVATE>
 : parse-multiline-string ( -- string )
-    rest-of-line "\"\"" head? [
+    lexer get rest-of-line "\"\"" head? [
        lexer get [ 2 + ] change-column drop
-        "\"\"\"" parse-long-string-escaped
+        "\"\"\""
    ] [
-        "\"" parse-long-string-escaped
+        "\""
-    ] if ;
+    ] if parse-long-string unescape-string ;