fix string parsing

db4
Doug Coleman 2009-09-20 21:50:17 -05:00
parent 31e7d355fe
commit a4a9dcce00
2 changed files with 52 additions and 35 deletions

View File

@ -31,3 +31,6 @@ IN: strings.parser.tests
] [ ] [
"\"\"\"\"abc\"\"\"\"" eval( -- string ) "\"\"\"\"abc\"\"\"\"" eval( -- string )
] unit-test ] unit-test
[ "\"\\" ] [ "\"\\" ] unit-test

View File

@ -1,7 +1,8 @@
! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman. ! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs kernel lexer make math math.parser USING: accessors assocs kernel lexer make math math.parser
namespaces parser sequences splitting strings arrays ; namespaces parser sequences splitting strings arrays
math.order ;
IN: strings.parser IN: strings.parser
ERROR: bad-escape ; ERROR: bad-escape ;
@ -73,7 +74,7 @@ name>char-hook [
<PRIVATE <PRIVATE
: lexer-advance ( i -- before ) : lexer-before ( i -- before )
[ [
[ [
lexer get lexer get
@ -88,11 +89,11 @@ name>char-hook [
[ lexer get [ column>> ] [ line-text>> ] bi ] dip [ lexer get [ column>> ] [ line-text>> ] bi ] dip
[ member? ] curry find-from ; [ member? ] curry find-from ;
: rest-of-line ( -- seq ) : rest-of-line ( lexer -- seq )
lexer get [ line-text>> ] [ column>> ] bi tail-slice ; [ line-text>> ] [ column>> ] bi tail-slice ;
: current-char ( lexer -- ch ) : current-char ( lexer -- ch/f )
[ column>> ] [ line-text>> ] bi nth ; [ column>> ] [ line-text>> ] bi ?nth ;
: advance-char ( lexer -- ) : advance-char ( lexer -- )
[ 1 + ] change-column drop ; [ 1 + ] change-column drop ;
@ -106,61 +107,74 @@ ERROR: escaped-char-expected ;
escaped-char-expected escaped-char-expected
] if ; ] if ;
: parse-escape ( i -- ) : next-line% ( lexer -- )
lexer-advance % CHAR: \ , [ rest-of-line % ]
lexer get [ next-line "\n" % ] bi ;
[ advance-char ]
[ next-char , ] bi ;
: next-string-line ( obj -- )
drop rest-of-line %
lexer get next-line "\n" % ;
: rest-begins? ( string -- ? ) : rest-begins? ( string -- ? )
[ [
lexer get [ line-text>> ] [ column>> ] bi tail-slice lexer get [ line-text>> ] [ column>> ] bi tail-slice
] dip head? ; ] dip head? ;
: advance-lexer ( n -- )
[ lexer get ] dip [ + ] curry change-column drop ; inline
: take-double-quotes ( -- string )
lexer get dup current-char CHAR: " = [
[ ] [ column>> ] [ line-text>> ] tri
[ CHAR: " = not ] find-from drop [
swap column>> - CHAR: " <repetition>
] [
rest-of-line
] if*
] [
drop f
] if dup length advance-lexer ;
: end-string-parse ( delimiter -- )
length 3 = [
take-double-quotes 3 tail %
] [
lexer get advance-char
] if ;
DEFER: (parse-long-string) DEFER: (parse-long-string)
: parse-rest-of-line ( string i token -- ) : parse-found-token ( i string token -- )
[ lexer-before % ] dip
CHAR: \ = [ CHAR: \ = [
parse-escape (parse-long-string) lexer get [ next-char , ] [ next-char , ] bi (parse-long-string)
] [ ] [
lexer-advance %
dup rest-begins? [ dup rest-begins? [
[ lexer get ] dip length [ + ] curry change-column drop end-string-parse
] [ ] [
lexer get next-char , (parse-long-string) lexer get next-char , (parse-long-string)
] if ] if
] if ; ] if ;
: parse-til-separator ( string -- ) ERROR: trailing-characters string ;
dup first find-next-token [
parse-rest-of-line
] [
next-string-line (parse-long-string)
] if* ;
: (parse-long-string) ( string -- ) : (parse-long-string) ( string -- )
lexer get still-parsing? [ lexer get still-parsing? [
parse-til-separator dup first find-next-token [
parse-found-token
] [
drop lexer get next-line%
(parse-long-string)
] if*
] [ ] [
unexpected-eof unexpected-eof
] if ; ] if ;
PRIVATE>
: parse-long-string ( string -- string' ) : parse-long-string ( string -- string' )
[ (parse-long-string) ] "" make ; [ (parse-long-string) ] "" make ;
: parse-long-string-escaped ( string -- string' )
parse-long-string unescape-string ;
PRIVATE>
: parse-multiline-string ( -- string ) : parse-multiline-string ( -- string )
rest-of-line "\"\"" head? [ lexer get rest-of-line "\"\"" head? [
lexer get [ 2 + ] change-column drop lexer get [ 2 + ] change-column drop
"\"\"\"" parse-long-string-escaped "\"\"\""
] [ ] [
"\"" parse-long-string-escaped "\""
] if ; ] if parse-long-string unescape-string ;