remove some c parsing words from sequence-parser
parent
05b7bb0079
commit
1660be50a4
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
|
@ -0,0 +1,103 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors c.lexer kernel sequence-parser tools.test ;
|
||||
IN: c.lexer.tests
|
||||
|
||||
[ 36 ]
|
||||
[
|
||||
" //jofiejoe\n //eoieow\n/*asdf*/\n "
|
||||
<sequence-parser> skip-whitespace/comments n>>
|
||||
] unit-test
|
||||
|
||||
[ f "33asdf" ]
|
||||
[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[ "asdf" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "_asdf" ]
|
||||
[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "_asdf400" ]
|
||||
[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "asdfasdf" ] [
|
||||
"/*asdfasdf*/" <sequence-parser> take-c-comment
|
||||
] unit-test
|
||||
|
||||
[ "k" ] [
|
||||
"/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "omg" ] [
|
||||
"//asdfasdf\nomg" <sequence-parser>
|
||||
[ take-c++-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "omg" ] [
|
||||
"omg" <sequence-parser>
|
||||
[ take-c++-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "/*asdfasdf" ] [
|
||||
"/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "asdf" "eoieoei" ] [
|
||||
"//asdf\neoieoei" <sequence-parser>
|
||||
[ take-c++-comment ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "abc\\\"def" ]
|
||||
[
|
||||
"\"abc\\\"def\" asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ skip-whitespace "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "\"abc" ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ "\"abc" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "c" ]
|
||||
[ "c" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ "abcd e \\\"f g" ]
|
||||
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
|
||||
|
||||
[ "123" ]
|
||||
[ "123jjj" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123uLL" ]
|
||||
[ "123uLL" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123ull" ]
|
||||
[ "123ull" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123u" ]
|
||||
[ "123u" <sequence-parser> take-c-integer ] unit-test
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators combinators.short-circuit
|
||||
generalizations kernel locals math.order math.ranges
|
||||
sequence-parser sequences sorting.functor sorting.slots
|
||||
unicode.categories ;
|
||||
IN: c.lexer
|
||||
|
||||
: take-c-comment ( sequence-parser -- seq/f )
|
||||
[
|
||||
dup "/*" take-sequence [
|
||||
"*/" take-until-sequence*
|
||||
] [
|
||||
drop f
|
||||
] if
|
||||
] with-sequence-parser ;
|
||||
|
||||
: take-c++-comment ( sequence-parser -- seq/f )
|
||||
[
|
||||
dup "//" take-sequence [
|
||||
[
|
||||
[
|
||||
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
|
||||
] take-until
|
||||
] [
|
||||
advance drop
|
||||
] bi
|
||||
] [
|
||||
drop f
|
||||
] if
|
||||
] with-sequence-parser ;
|
||||
|
||||
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
|
||||
skip-whitespace-eol
|
||||
{
|
||||
{ [ dup take-c-comment ] [ skip-whitespace/comments ] }
|
||||
{ [ dup take-c++-comment ] [ skip-whitespace/comments ] }
|
||||
[ ]
|
||||
} cond ;
|
||||
|
||||
: take-define-identifier ( sequence-parser -- string )
|
||||
skip-whitespace/comments
|
||||
[ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
|
||||
|
||||
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
|
||||
sequence-parser n>> :> start-n
|
||||
sequence-parser advance
|
||||
[
|
||||
{
|
||||
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
|
||||
[ current quote-char = not ]
|
||||
} 1||
|
||||
] take-while :> string
|
||||
sequence-parser current quote-char = [
|
||||
sequence-parser advance* string
|
||||
] [
|
||||
start-n sequence-parser (>>n) f
|
||||
] if ;
|
||||
|
||||
: (take-token) ( sequence-parser -- string )
|
||||
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
||||
|
||||
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
|
||||
sequence-parser skip-whitespace
|
||||
dup current {
|
||||
{ quote-char [ escape-char quote-char take-quoted-string ] }
|
||||
{ f [ drop f ] }
|
||||
[ drop (take-token) ]
|
||||
} case ;
|
||||
|
||||
: take-token ( sequence-parser -- string/f )
|
||||
CHAR: \ CHAR: " take-token* ;
|
||||
|
||||
: c-identifier-begin? ( ch -- ? )
|
||||
CHAR: a CHAR: z [a,b]
|
||||
CHAR: A CHAR: Z [a,b]
|
||||
{ CHAR: _ } 3append member? ;
|
||||
|
||||
: c-identifier-ch? ( ch -- ? )
|
||||
CHAR: a CHAR: z [a,b]
|
||||
CHAR: A CHAR: Z [a,b]
|
||||
CHAR: 0 CHAR: 9 [a,b]
|
||||
{ CHAR: _ } 4 nappend member? ;
|
||||
|
||||
: (take-c-identifier) ( sequence-parser -- string/f )
|
||||
dup current c-identifier-begin? [
|
||||
[ current c-identifier-ch? ] take-while
|
||||
] [
|
||||
drop f
|
||||
] if ;
|
||||
|
||||
: take-c-identifier ( sequence-parser -- string/f )
|
||||
[ (take-c-identifier) ] with-sequence-parser ;
|
||||
|
||||
<< "length" [ length ] define-sorting >>
|
||||
|
||||
: sort-tokens ( seq -- seq' )
|
||||
{ length>=< <=> } sort-by ;
|
||||
|
||||
: take-c-integer ( sequence-parser -- string/f )
|
||||
[
|
||||
dup take-integer [
|
||||
swap
|
||||
{ "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
|
||||
take-longest [ append ] when*
|
||||
] [
|
||||
drop f
|
||||
] if*
|
||||
] with-sequence-parser ;
|
||||
|
||||
CONSTANT: c-punctuators
|
||||
{
|
||||
"[" "]" "(" ")" "{" "}" "." "->"
|
||||
"++" "--" "&" "*" "+" "-" "~" "!"
|
||||
"/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
|
||||
"?" ":" ";" "..."
|
||||
"=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
|
||||
"," "#" "##"
|
||||
"<:" ":>" "<%" "%>" "%:" "%:%:"
|
||||
}
|
||||
|
||||
: take-c-punctuator ( sequence-parser -- string/f )
|
||||
c-punctuators take-longest ;
|
|
@ -4,7 +4,7 @@ USING: sequence-parser io io.encodings.utf8 io.files
|
|||
io.streams.string kernel combinators accessors io.pathnames
|
||||
fry sequences arrays locals namespaces io.directories
|
||||
assocs math splitting make unicode.categories
|
||||
combinators.short-circuit ;
|
||||
combinators.short-circuit c.lexer ;
|
||||
IN: c.preprocessor
|
||||
|
||||
: initial-library-paths ( -- seq )
|
||||
|
|
|
@ -77,47 +77,6 @@ IN: sequence-parser.tests
|
|||
[ "cd" ]
|
||||
[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "abc\\\"def" ]
|
||||
[
|
||||
"\"abc\\\"def\" asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[
|
||||
"\"abc\" asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ skip-whitespace "asdf" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
CHAR: \ CHAR: " take-quoted-string
|
||||
] unit-test
|
||||
|
||||
[ "\"abc" ]
|
||||
[
|
||||
"\"abc asdf" <sequence-parser>
|
||||
[ CHAR: \ CHAR: " take-quoted-string drop ]
|
||||
[ "\"abc" take-sequence ] bi
|
||||
] unit-test
|
||||
|
||||
[ "c" ]
|
||||
[ "c" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "" <sequence-parser> take-token ] unit-test
|
||||
|
||||
[ "abcd e \\\"f g" ]
|
||||
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "" <sequence-parser> take-rest ] unit-test
|
||||
|
||||
|
@ -140,63 +99,6 @@ IN: sequence-parser.tests
|
|||
[ "abcd" ] [ "abcd" <sequence-parser> 4 take-n ] unit-test
|
||||
[ "abcd" "efg" ] [ "abcdefg" <sequence-parser> [ 4 take-n ] [ take-rest ] bi ] unit-test
|
||||
|
||||
[ "asdfasdf" ] [
|
||||
"/*asdfasdf*/" <sequence-parser> take-c-comment
|
||||
] unit-test
|
||||
|
||||
[ "k" ] [
|
||||
"/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "omg" ] [
|
||||
"//asdfasdf\nomg" <sequence-parser>
|
||||
[ take-c++-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "omg" ] [
|
||||
"omg" <sequence-parser>
|
||||
[ take-c++-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "/*asdfasdf" ] [
|
||||
"/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ "asdf" "eoieoei" ] [
|
||||
"//asdf\neoieoei" <sequence-parser>
|
||||
[ take-c++-comment ] [ take-rest ] bi
|
||||
] unit-test
|
||||
|
||||
[ f "33asdf" ]
|
||||
[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
|
||||
|
||||
[ "asdf" ]
|
||||
[ "asdf" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "_asdf" ]
|
||||
[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "_asdf400" ]
|
||||
[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
|
||||
|
||||
[ "123" ]
|
||||
[ "123jjj" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123uLL" ]
|
||||
[ "123uLL" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123ull" ]
|
||||
[ "123ull" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ "123u" ]
|
||||
[ "123u" <sequence-parser> take-c-integer ] unit-test
|
||||
|
||||
[ 36 ]
|
||||
[
|
||||
" //jofiejoe\n //eoieow\n/*asdf*/\n "
|
||||
<sequence-parser> skip-whitespace/comments n>>
|
||||
] unit-test
|
||||
|
||||
[ f ]
|
||||
[ "\n" <sequence-parser> take-integer ] unit-test
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces math kernel sequences accessors fry circular
|
||||
unicode.case unicode.categories locals combinators.short-circuit
|
||||
make combinators io splitting math.parser math.ranges
|
||||
generalizations sorting.functor math.order sorting.slots ;
|
||||
USING: accessors circular combinators.short-circuit fry io
|
||||
kernel locals math math.order sequences sorting.functor
|
||||
sorting.slots unicode.categories ;
|
||||
IN: sequence-parser
|
||||
|
||||
TUPLE: sequence-parser sequence n ;
|
||||
|
@ -109,42 +108,6 @@ TUPLE: sequence-parser sequence n ;
|
|||
: skip-whitespace-eol ( sequence-parser -- sequence-parser )
|
||||
[ [ current " \t\r" member? not ] take-until drop ] keep ;
|
||||
|
||||
: take-c-comment ( sequence-parser -- seq/f )
|
||||
[
|
||||
dup "/*" take-sequence [
|
||||
"*/" take-until-sequence*
|
||||
] [
|
||||
drop f
|
||||
] if
|
||||
] with-sequence-parser ;
|
||||
|
||||
: take-c++-comment ( sequence-parser -- seq/f )
|
||||
[
|
||||
dup "//" take-sequence [
|
||||
[
|
||||
[
|
||||
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
|
||||
] take-until
|
||||
] [
|
||||
advance drop
|
||||
] bi
|
||||
] [
|
||||
drop f
|
||||
] if
|
||||
] with-sequence-parser ;
|
||||
|
||||
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
|
||||
skip-whitespace-eol
|
||||
{
|
||||
{ [ dup take-c-comment ] [ skip-whitespace/comments ] }
|
||||
{ [ dup take-c++-comment ] [ skip-whitespace/comments ] }
|
||||
[ ]
|
||||
} cond ;
|
||||
|
||||
: take-define-identifier ( sequence-parser -- string )
|
||||
skip-whitespace/comments
|
||||
[ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
|
||||
|
||||
: take-rest-slice ( sequence-parser -- sequence/f )
|
||||
[ sequence>> ] [ n>> ] bi
|
||||
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
|
||||
|
@ -158,35 +121,6 @@ TUPLE: sequence-parser sequence n ;
|
|||
: parse-sequence ( sequence quot -- )
|
||||
[ <sequence-parser> ] dip call ; inline
|
||||
|
||||
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
|
||||
sequence-parser n>> :> start-n
|
||||
sequence-parser advance
|
||||
[
|
||||
{
|
||||
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
|
||||
[ current quote-char = not ]
|
||||
} 1||
|
||||
] take-while :> string
|
||||
sequence-parser current quote-char = [
|
||||
sequence-parser advance* string
|
||||
] [
|
||||
start-n sequence-parser (>>n) f
|
||||
] if ;
|
||||
|
||||
: (take-token) ( sequence-parser -- string )
|
||||
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
||||
|
||||
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
|
||||
sequence-parser skip-whitespace
|
||||
dup current {
|
||||
{ quote-char [ escape-char quote-char take-quoted-string ] }
|
||||
{ f [ drop f ] }
|
||||
[ drop (take-token) ]
|
||||
} case ;
|
||||
|
||||
: take-token ( sequence-parser -- string/f )
|
||||
CHAR: \ CHAR: " take-token* ;
|
||||
|
||||
: take-integer ( sequence-parser -- n/f )
|
||||
[ current digit? ] take-while ;
|
||||
|
||||
|
@ -198,27 +132,6 @@ TUPLE: sequence-parser sequence n ;
|
|||
sequence-parser [ n + ] change-n drop
|
||||
] if ;
|
||||
|
||||
: c-identifier-begin? ( ch -- ? )
|
||||
CHAR: a CHAR: z [a,b]
|
||||
CHAR: A CHAR: Z [a,b]
|
||||
{ CHAR: _ } 3append member? ;
|
||||
|
||||
: c-identifier-ch? ( ch -- ? )
|
||||
CHAR: a CHAR: z [a,b]
|
||||
CHAR: A CHAR: Z [a,b]
|
||||
CHAR: 0 CHAR: 9 [a,b]
|
||||
{ CHAR: _ } 4 nappend member? ;
|
||||
|
||||
: (take-c-identifier) ( sequence-parser -- string/f )
|
||||
dup current c-identifier-begin? [
|
||||
[ current c-identifier-ch? ] take-while
|
||||
] [
|
||||
drop f
|
||||
] if ;
|
||||
|
||||
: take-c-identifier ( sequence-parser -- string/f )
|
||||
[ (take-c-identifier) ] with-sequence-parser ;
|
||||
|
||||
<< "length" [ length ] define-sorting >>
|
||||
|
||||
: sort-tokens ( seq -- seq' )
|
||||
|
@ -228,34 +141,8 @@ TUPLE: sequence-parser sequence n ;
|
|||
swap
|
||||
'[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
|
||||
|
||||
|
||||
: take-longest ( sequence-parser seq -- seq )
|
||||
sort-tokens take-first-matching ;
|
||||
|
||||
: take-c-integer ( sequence-parser -- string/f )
|
||||
[
|
||||
dup take-integer [
|
||||
swap
|
||||
{ "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
|
||||
take-longest [ append ] when*
|
||||
] [
|
||||
drop f
|
||||
] if*
|
||||
] with-sequence-parser ;
|
||||
|
||||
CONSTANT: c-punctuators
|
||||
{
|
||||
"[" "]" "(" ")" "{" "}" "." "->"
|
||||
"++" "--" "&" "*" "+" "-" "~" "!"
|
||||
"/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
|
||||
"?" ":" ";" "..."
|
||||
"=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
|
||||
"," "#" "##"
|
||||
"<:" ":>" "<%" "%>" "%:" "%:%:"
|
||||
}
|
||||
|
||||
: take-c-punctuator ( sequence-parser -- string/f )
|
||||
c-punctuators take-longest ;
|
||||
|
||||
: write-full ( sequence-parser -- ) sequence>> write ;
|
||||
: write-rest ( sequence-parser -- ) take-rest write ;
|
||||
|
|
Loading…
Reference in New Issue