factor/core/strings/parser/parser.factor

181 lines
4.1 KiB
Factor
Raw Normal View History

2009-09-19 04:55:05 -04:00
! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
2008-06-25 04:25:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
2009-09-19 04:55:05 -04:00
USING: accessors assocs kernel lexer make math math.parser
2009-09-20 22:50:17 -04:00
namespaces parser sequences splitting strings arrays
math.order ;
2008-06-25 04:25:08 -04:00
IN: strings.parser
ERROR: bad-escape ;
: escape ( escape -- ch )
H{
{ CHAR: a CHAR: \a }
{ CHAR: e CHAR: \e }
{ CHAR: n CHAR: \n }
{ CHAR: r CHAR: \r }
{ CHAR: t CHAR: \t }
{ CHAR: s CHAR: \s }
{ CHAR: \s CHAR: \s }
{ CHAR: 0 CHAR: \0 }
{ CHAR: \\ CHAR: \\ }
{ CHAR: \" CHAR: \" }
} at [ bad-escape ] unless* ;
SYMBOL: name>char-hook
name>char-hook [
[ "Unicode support not available" throw ]
] initialize
2008-06-25 04:25:08 -04:00
: unicode-escape ( str -- ch str' )
"{" ?head-slice [
CHAR: } over index cut-slice
2009-03-16 21:11:36 -04:00
[ >string name>char-hook get call( name -- char ) ] dip
2008-06-25 04:25:08 -04:00
rest-slice
] [
6 cut-slice [ hex> ] dip
2008-06-25 04:25:08 -04:00
] if ;
: next-escape ( str -- ch str' )
"u" ?head-slice [
unicode-escape
] [
unclip-slice escape swap
] if ;
2009-09-19 04:55:05 -04:00
: (unescape-string) ( str -- )
CHAR: \\ over index dup [
cut-slice [ % ] dip rest-slice
next-escape [ , ] dip
(unescape-string)
] [
drop %
] if ;
: unescape-string ( str -- str' )
[ (unescape-string) ] "" make ;
2008-06-25 04:25:08 -04:00
: (parse-string) ( str -- m )
dup [ "\"\\" member? ] find dup [
[ cut-slice [ % ] dip rest-slice ] dip
2009-03-16 21:11:36 -04:00
CHAR: " = [
from>>
2008-06-25 04:25:08 -04:00
] [
2009-03-16 21:11:36 -04:00
next-escape [ , ] dip (parse-string)
2008-06-25 04:25:08 -04:00
] if
] [
"Unterminated string" throw
] if ;
: parse-string ( -- str )
lexer get [
[ swap tail-slice (parse-string) ] "" make swap
] change-lexer-column ;
2009-09-19 04:55:05 -04:00
<PRIVATE
2009-09-20 22:50:17 -04:00
: lexer-before ( i -- before )
2009-09-19 04:55:05 -04:00
[
[
lexer get
[ column>> ] [ line-text>> ] bi
] dip swap subseq
] [
2009-09-19 04:55:05 -04:00
lexer get (>>column)
] bi ;
: find-next-token ( ch -- i elt )
CHAR: \ 2array
[ lexer get [ column>> ] [ line-text>> ] bi ] dip
[ member? ] curry find-from ;
2009-09-20 22:50:17 -04:00
: rest-of-line ( lexer -- seq )
[ line-text>> ] [ column>> ] bi tail-slice ;
2009-09-19 04:55:05 -04:00
2009-09-20 22:50:17 -04:00
: current-char ( lexer -- ch/f )
[ column>> ] [ line-text>> ] bi ?nth ;
: advance-char ( lexer -- )
[ 1 + ] change-column drop ;
ERROR: escaped-char-expected ;
: next-char ( lexer -- ch )
dup still-parsing-line? [
[ current-char ] [ advance-char ] bi
] [
escaped-char-expected
] if ;
2009-09-20 22:50:17 -04:00
: next-line% ( lexer -- )
[ rest-of-line % ]
[ next-line "\n" % ] bi ;
2009-09-19 04:55:05 -04:00
: rest-begins? ( string -- ? )
[
lexer get [ line-text>> ] [ column>> ] bi tail-slice
] dip head? ;
2009-09-20 22:50:17 -04:00
: advance-lexer ( n -- )
[ lexer get ] dip [ + ] curry change-column drop ; inline
: take-double-quotes ( -- string )
lexer get dup current-char CHAR: " = [
[ ] [ column>> ] [ line-text>> ] tri
[ CHAR: " = not ] find-from drop [
swap column>> - CHAR: " <repetition>
] [
rest-of-line
] if*
] [
drop f
] if dup length advance-lexer ;
: end-string-parse ( delimiter -- )
length 3 = [
take-double-quotes 3 tail %
] [
lexer get advance-char
] if ;
2009-09-19 04:55:05 -04:00
DEFER: (parse-long-string)
2009-09-20 22:50:17 -04:00
: parse-found-token ( i string token -- )
[ lexer-before % ] dip
2009-09-19 04:55:05 -04:00
CHAR: \ = [
2009-09-20 22:50:17 -04:00
lexer get [ next-char , ] [ next-char , ] bi (parse-long-string)
2009-09-19 04:55:05 -04:00
] [
dup rest-begins? [
2009-09-20 22:50:17 -04:00
end-string-parse
2009-09-19 04:55:05 -04:00
] [
2009-09-20 16:08:06 -04:00
lexer get next-char , (parse-long-string)
2009-09-19 04:55:05 -04:00
] if
] if ;
2009-09-20 22:50:17 -04:00
ERROR: trailing-characters string ;
2009-09-19 04:55:05 -04:00
: (parse-long-string) ( string -- )
lexer get still-parsing? [
2009-09-20 22:50:17 -04:00
dup first find-next-token [
parse-found-token
] [
drop lexer get next-line%
(parse-long-string)
] if*
2009-09-19 04:55:05 -04:00
] [
unexpected-eof
] if ;
2009-09-20 22:50:17 -04:00
PRIVATE>
2009-09-19 04:55:05 -04:00
: parse-long-string ( string -- string' )
[ (parse-long-string) ] "" make ;
2009-09-19 04:55:05 -04:00
: parse-multiline-string ( -- string )
2009-09-20 22:50:17 -04:00
lexer get rest-of-line "\"\"" head? [
2009-09-19 04:55:05 -04:00
lexer get [ 2 + ] change-column drop
2009-09-20 22:50:17 -04:00
"\"\"\""
2009-09-19 04:55:05 -04:00
] [
2009-09-20 22:50:17 -04:00
"\""
] if parse-long-string unescape-string ;