factor/core/strings/parser/parser.factor

205 lines
4.9 KiB
Factor
Raw Normal View History

2009-09-19 04:55:05 -04:00
! Copyright (C) 2008, 2009 Slava Pestov, Doug Coleman.
2008-06-25 04:25:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators kernel kernel.private
lexer math math.parser namespaces sbufs sequences splitting
strings ;
2008-06-25 04:25:08 -04:00
IN: strings.parser
ERROR: bad-escape char ;
2008-06-25 04:25:08 -04:00
: escape ( escape -- ch )
H{
{ CHAR: a CHAR: \a }
{ CHAR: b CHAR: \b }
2008-06-25 04:25:08 -04:00
{ CHAR: e CHAR: \e }
{ CHAR: f CHAR: \f }
2008-06-25 04:25:08 -04:00
{ CHAR: n CHAR: \n }
{ CHAR: r CHAR: \r }
{ CHAR: t CHAR: \t }
{ CHAR: s CHAR: \s }
{ CHAR: v CHAR: \v }
2008-06-25 04:25:08 -04:00
{ CHAR: \s CHAR: \s }
{ CHAR: 0 CHAR: \0 }
{ CHAR: \\ CHAR: \\ }
{ CHAR: \" CHAR: \" }
} ?at [ bad-escape ] unless ;
2008-06-25 04:25:08 -04:00
SYMBOL: name>char-hook
name>char-hook [
[ "Unicode support not available" throw ]
] initialize
2008-06-25 04:25:08 -04:00
: hex-escape ( str -- ch str' )
2 cut-slice [ hex> ] dip ;
2008-06-25 04:25:08 -04:00
: unicode-escape ( str -- ch str' )
"{" ?head-slice [
CHAR: } over index cut-slice
2009-03-16 21:11:36 -04:00
[ >string name>char-hook get call( name -- char ) ] dip
2008-06-25 04:25:08 -04:00
rest-slice
] [
6 cut-slice [ hex> ] dip
2008-06-25 04:25:08 -04:00
] if ;
: next-escape ( str -- ch str' )
2014-06-04 00:04:05 -04:00
unclip-slice {
{ CHAR: u [ unicode-escape ] }
{ CHAR: x [ hex-escape ] }
[ escape swap ]
} case ;
2008-06-25 04:25:08 -04:00
<PRIVATE
: (unescape-string) ( accum str i/f -- accum )
{ sbuf object object } declare
[
cut-slice [ over push-all ] dip
rest-slice next-escape [ over push ] dip
CHAR: \\ over index (unescape-string)
2009-09-19 04:55:05 -04:00
] [
over push-all
] if* ;
PRIVATE>
2009-09-19 04:55:05 -04:00
: unescape-string ( str -- str' )
CHAR: \\ over index [
[ [ length <sbuf> ] keep ] dip (unescape-string)
] when* "" like ;
<PRIVATE
2009-09-19 04:55:05 -04:00
: (parse-string) ( accum str -- accum m )
{ sbuf slice } declare
dup [ "\"\\" member? ] find [
[ cut-slice [ over push-all ] dip rest-slice ] dip
2009-03-16 21:11:36 -04:00
CHAR: " = [
from>>
2008-06-25 04:25:08 -04:00
] [
next-escape [ over push ] dip (parse-string)
2008-06-25 04:25:08 -04:00
] if
] [
"Unterminated string" throw
] if* ;
PRIVATE>
2008-06-25 04:25:08 -04:00
: parse-string ( -- str )
2014-06-04 00:05:36 -04:00
SBUF" " clone lexer get [
swap tail-slice (parse-string) [ "" like ] dip
2008-06-25 04:25:08 -04:00
] change-lexer-column ;
2009-09-19 04:55:05 -04:00
<PRIVATE
: lexer-subseq ( i lexer -- before )
{ fixnum lexer } declare
[ [ column>> ] [ line-text>> ] bi swapd subseq ]
[ column<< ] 2bi ;
2009-09-19 04:55:05 -04:00
2009-09-20 22:50:17 -04:00
: rest-of-line ( lexer -- seq )
{ lexer } declare
2009-09-20 22:50:17 -04:00
[ line-text>> ] [ column>> ] bi tail-slice ;
2009-09-19 04:55:05 -04:00
2009-09-20 22:50:17 -04:00
: current-char ( lexer -- ch/f )
{ lexer } declare
2009-09-20 22:50:17 -04:00
[ column>> ] [ line-text>> ] bi ?nth ;
: advance-char ( lexer -- )
{ lexer } declare
[ 1 + ] change-column drop ;
ERROR: escaped-char-expected ;
: next-char ( lexer -- ch )
{ lexer } declare
dup still-parsing-line? [
[ current-char ] [ advance-char ] bi
] [
escaped-char-expected
] if ;
: lexer-head? ( lexer string -- ? )
{ lexer string } declare
[ rest-of-line ] dip head? ;
2009-09-19 04:55:05 -04:00
: advance-lexer ( lexer n -- )
{ lexer fixnum } declare
[ + ] curry change-column drop ;
2009-09-20 22:50:17 -04:00
: find-next-token ( lexer ch -- i elt )
{ lexer fixnum } declare
[ [ column>> ] [ line-text>> ] bi ] dip
CHAR: \ 2array [ member? ] curry find-from ;
: next-line% ( accum lexer -- )
{ sbuf lexer } declare
[ rest-of-line swap push-all ]
[ next-line CHAR: \n swap push ] 2bi ;
: take-double-quotes ( lexer -- string )
{ lexer } declare
dup current-char CHAR: " = [
dup [ column>> ] [ line-text>> ] bi
2009-09-20 22:50:17 -04:00
[ CHAR: " = not ] find-from drop [
over column>> - CHAR: " <repetition>
2009-09-20 22:50:17 -04:00
] [
dup rest-of-line
2009-09-20 22:50:17 -04:00
] if*
[ length advance-lexer ] keep
] [ drop f ] if ;
2009-09-20 22:50:17 -04:00
: end-string-parse ( accum lexer delimiter -- )
{ sbuf lexer string } declare
2009-09-20 22:50:17 -04:00
length 3 = [
take-double-quotes 3 tail-slice swap push-all
2009-09-20 22:50:17 -04:00
] [
advance-char drop
] if ;
2009-09-20 22:50:17 -04:00
2009-09-24 20:45:03 -04:00
DEFER: (parse-multiline-string)
2009-09-19 04:55:05 -04:00
: parse-found-token ( accum lexer string i token -- )
{ sbuf lexer string fixnum fixnum } declare
[ [ 2over ] dip swap lexer-subseq swap push-all ] dip
2009-09-19 04:55:05 -04:00
CHAR: \ = [
2over next-char swap push
2over next-char swap push
(parse-multiline-string)
2009-09-19 04:55:05 -04:00
] [
2dup lexer-head? [
2009-09-20 22:50:17 -04:00
end-string-parse
2009-09-19 04:55:05 -04:00
] [
2over next-char swap push
(parse-multiline-string)
2009-09-19 04:55:05 -04:00
] if
] if ;
2009-09-20 22:50:17 -04:00
ERROR: trailing-characters string ;
2009-09-19 04:55:05 -04:00
: (parse-multiline-string) ( accum lexer string -- )
{ sbuf lexer fixnum } declare
over still-parsing? [
2dup first find-next-token [
2009-09-20 22:50:17 -04:00
parse-found-token
] [
drop 2over next-line%
2009-09-24 20:45:03 -04:00
(parse-multiline-string)
2009-09-20 22:50:17 -04:00
] if*
2009-09-19 04:55:05 -04:00
] [
throw-unexpected-eof
] if ;
2009-09-19 04:55:05 -04:00
PRIVATE>
2009-09-19 04:55:05 -04:00
: parse-multiline-string ( -- string )
SBUF" " clone [
lexer get
dup rest-of-line "\"\"" head? [
[ 2 + ] change-column
"\"\"\""
] [
"\""
] if (parse-multiline-string)
] keep unescape-string ;