remove some c parsing words from sequence-parser

db4
Doug Coleman 2009-08-01 20:42:29 -05:00
parent 05b7bb0079
commit 1660be50a4
6 changed files with 231 additions and 215 deletions

View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -0,0 +1,103 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors c.lexer kernel sequence-parser tools.test ;
IN: c.lexer.tests
[ 36 ]
[
" //jofiejoe\n //eoieow\n/*asdf*/\n "
<sequence-parser> skip-whitespace/comments n>>
] unit-test
[ f "33asdf" ]
[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
[ "asdf" ]
[ "asdf" <sequence-parser> take-c-identifier ] unit-test
[ "_asdf" ]
[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
[ "_asdf400" ]
[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
[ "asdfasdf" ] [
"/*asdfasdf*/" <sequence-parser> take-c-comment
] unit-test
[ "k" ] [
"/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
] unit-test
[ "omg" ] [
"//asdfasdf\nomg" <sequence-parser>
[ take-c++-comment drop ] [ take-rest ] bi
] unit-test
[ "omg" ] [
"omg" <sequence-parser>
[ take-c++-comment drop ] [ take-rest ] bi
] unit-test
[ "/*asdfasdf" ] [
"/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
] unit-test
[ "asdf" "eoieoei" ] [
"//asdf\neoieoei" <sequence-parser>
[ take-c++-comment ] [ take-rest ] bi
] unit-test
[ f ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
] unit-test
[ "abc\\\"def" ]
[
"\"abc\\\"def\" asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "asdf" ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ skip-whitespace "asdf" take-sequence ] bi
] unit-test
[ f ]
[
"\"abc asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "\"abc" ]
[
"\"abc asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ "\"abc" take-sequence ] bi
] unit-test
[ "c" ]
[ "c" <sequence-parser> take-token ] unit-test
[ f ]
[ "" <sequence-parser> take-token ] unit-test
[ "abcd e \\\"f g" ]
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
[ "123" ]
[ "123jjj" <sequence-parser> take-c-integer ] unit-test
[ "123uLL" ]
[ "123uLL" <sequence-parser> take-c-integer ] unit-test
[ "123ull" ]
[ "123ull" <sequence-parser> take-c-integer ] unit-test
[ "123u" ]
[ "123u" <sequence-parser> take-c-integer ] unit-test

123
extra/c/lexer/lexer.factor Normal file
View File

@ -0,0 +1,123 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit
generalizations kernel locals math.order math.ranges
sequence-parser sequences sorting.functor sorting.slots
unicode.categories ;
IN: c.lexer
: take-c-comment ( sequence-parser -- seq/f )
[
dup "/*" take-sequence [
"*/" take-until-sequence*
] [
drop f
] if
] with-sequence-parser ;
: take-c++-comment ( sequence-parser -- seq/f )
[
dup "//" take-sequence [
[
[
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
] take-until
] [
advance drop
] bi
] [
drop f
] if
] with-sequence-parser ;
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
skip-whitespace-eol
{
{ [ dup take-c-comment ] [ skip-whitespace/comments ] }
{ [ dup take-c++-comment ] [ skip-whitespace/comments ] }
[ ]
} cond ;
: take-define-identifier ( sequence-parser -- string )
skip-whitespace/comments
[ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
sequence-parser n>> :> start-n
sequence-parser advance
[
{
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
[ current quote-char = not ]
} 1||
] take-while :> string
sequence-parser current quote-char = [
sequence-parser advance* string
] [
start-n sequence-parser (>>n) f
] if ;
: (take-token) ( sequence-parser -- string )
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
sequence-parser skip-whitespace
dup current {
{ quote-char [ escape-char quote-char take-quoted-string ] }
{ f [ drop f ] }
[ drop (take-token) ]
} case ;
: take-token ( sequence-parser -- string/f )
CHAR: \ CHAR: " take-token* ;
: c-identifier-begin? ( ch -- ? )
CHAR: a CHAR: z [a,b]
CHAR: A CHAR: Z [a,b]
{ CHAR: _ } 3append member? ;
: c-identifier-ch? ( ch -- ? )
CHAR: a CHAR: z [a,b]
CHAR: A CHAR: Z [a,b]
CHAR: 0 CHAR: 9 [a,b]
{ CHAR: _ } 4 nappend member? ;
: (take-c-identifier) ( sequence-parser -- string/f )
dup current c-identifier-begin? [
[ current c-identifier-ch? ] take-while
] [
drop f
] if ;
: take-c-identifier ( sequence-parser -- string/f )
[ (take-c-identifier) ] with-sequence-parser ;
<< "length" [ length ] define-sorting >>
: sort-tokens ( seq -- seq' )
{ length>=< <=> } sort-by ;
: take-c-integer ( sequence-parser -- string/f )
[
dup take-integer [
swap
{ "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
take-longest [ append ] when*
] [
drop f
] if*
] with-sequence-parser ;
CONSTANT: c-punctuators
{
"[" "]" "(" ")" "{" "}" "." "->"
"++" "--" "&" "*" "+" "-" "~" "!"
"/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
"?" ":" ";" "..."
"=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
"," "#" "##"
"<:" ":>" "<%" "%>" "%:" "%:%:"
}
: take-c-punctuator ( sequence-parser -- string/f )
c-punctuators take-longest ;

View File

@ -4,7 +4,7 @@ USING: sequence-parser io io.encodings.utf8 io.files
io.streams.string kernel combinators accessors io.pathnames io.streams.string kernel combinators accessors io.pathnames
fry sequences arrays locals namespaces io.directories fry sequences arrays locals namespaces io.directories
assocs math splitting make unicode.categories assocs math splitting make unicode.categories
combinators.short-circuit ; combinators.short-circuit c.lexer ;
IN: c.preprocessor IN: c.preprocessor
: initial-library-paths ( -- seq ) : initial-library-paths ( -- seq )

View File

@ -77,47 +77,6 @@ IN: sequence-parser.tests
[ "cd" ] [ "cd" ]
[ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test [ "abcd" <sequence-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
[ f ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
] unit-test
[ "abc\\\"def" ]
[
"\"abc\\\"def\" asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "asdf" ]
[
"\"abc\" asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ skip-whitespace "asdf" take-sequence ] bi
] unit-test
[ f ]
[
"\"abc asdf" <sequence-parser>
CHAR: \ CHAR: " take-quoted-string
] unit-test
[ "\"abc" ]
[
"\"abc asdf" <sequence-parser>
[ CHAR: \ CHAR: " take-quoted-string drop ]
[ "\"abc" take-sequence ] bi
] unit-test
[ "c" ]
[ "c" <sequence-parser> take-token ] unit-test
[ f ]
[ "" <sequence-parser> take-token ] unit-test
[ "abcd e \\\"f g" ]
[ "\"abcd e \\\"f g\"" <sequence-parser> CHAR: \ CHAR: " take-token* ] unit-test
[ f ] [ f ]
[ "" <sequence-parser> take-rest ] unit-test [ "" <sequence-parser> take-rest ] unit-test
@ -140,63 +99,6 @@ IN: sequence-parser.tests
[ "abcd" ] [ "abcd" <sequence-parser> 4 take-n ] unit-test [ "abcd" ] [ "abcd" <sequence-parser> 4 take-n ] unit-test
[ "abcd" "efg" ] [ "abcdefg" <sequence-parser> [ 4 take-n ] [ take-rest ] bi ] unit-test [ "abcd" "efg" ] [ "abcdefg" <sequence-parser> [ 4 take-n ] [ take-rest ] bi ] unit-test
[ "asdfasdf" ] [
"/*asdfasdf*/" <sequence-parser> take-c-comment
] unit-test
[ "k" ] [
"/*asdfasdf*/k" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
] unit-test
[ "omg" ] [
"//asdfasdf\nomg" <sequence-parser>
[ take-c++-comment drop ] [ take-rest ] bi
] unit-test
[ "omg" ] [
"omg" <sequence-parser>
[ take-c++-comment drop ] [ take-rest ] bi
] unit-test
[ "/*asdfasdf" ] [
"/*asdfasdf" <sequence-parser> [ take-c-comment drop ] [ take-rest ] bi
] unit-test
[ "asdf" "eoieoei" ] [
"//asdf\neoieoei" <sequence-parser>
[ take-c++-comment ] [ take-rest ] bi
] unit-test
[ f "33asdf" ]
[ "33asdf" <sequence-parser> [ take-c-identifier ] [ take-rest ] bi ] unit-test
[ "asdf" ]
[ "asdf" <sequence-parser> take-c-identifier ] unit-test
[ "_asdf" ]
[ "_asdf" <sequence-parser> take-c-identifier ] unit-test
[ "_asdf400" ]
[ "_asdf400" <sequence-parser> take-c-identifier ] unit-test
[ "123" ]
[ "123jjj" <sequence-parser> take-c-integer ] unit-test
[ "123uLL" ]
[ "123uLL" <sequence-parser> take-c-integer ] unit-test
[ "123ull" ]
[ "123ull" <sequence-parser> take-c-integer ] unit-test
[ "123u" ]
[ "123u" <sequence-parser> take-c-integer ] unit-test
[ 36 ]
[
" //jofiejoe\n //eoieow\n/*asdf*/\n "
<sequence-parser> skip-whitespace/comments n>>
] unit-test
[ f ] [ f ]
[ "\n" <sequence-parser> take-integer ] unit-test [ "\n" <sequence-parser> take-integer ] unit-test

View File

@ -1,9 +1,8 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman. ! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular USING: accessors circular combinators.short-circuit fry io
unicode.case unicode.categories locals combinators.short-circuit kernel locals math math.order sequences sorting.functor
make combinators io splitting math.parser math.ranges sorting.slots unicode.categories ;
generalizations sorting.functor math.order sorting.slots ;
IN: sequence-parser IN: sequence-parser
TUPLE: sequence-parser sequence n ; TUPLE: sequence-parser sequence n ;
@ -109,42 +108,6 @@ TUPLE: sequence-parser sequence n ;
: skip-whitespace-eol ( sequence-parser -- sequence-parser ) : skip-whitespace-eol ( sequence-parser -- sequence-parser )
[ [ current " \t\r" member? not ] take-until drop ] keep ; [ [ current " \t\r" member? not ] take-until drop ] keep ;
: take-c-comment ( sequence-parser -- seq/f )
[
dup "/*" take-sequence [
"*/" take-until-sequence*
] [
drop f
] if
] with-sequence-parser ;
: take-c++-comment ( sequence-parser -- seq/f )
[
dup "//" take-sequence [
[
[
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
] take-until
] [
advance drop
] bi
] [
drop f
] if
] with-sequence-parser ;
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
skip-whitespace-eol
{
{ [ dup take-c-comment ] [ skip-whitespace/comments ] }
{ [ dup take-c++-comment ] [ skip-whitespace/comments ] }
[ ]
} cond ;
: take-define-identifier ( sequence-parser -- string )
skip-whitespace/comments
[ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
: take-rest-slice ( sequence-parser -- sequence/f ) : take-rest-slice ( sequence-parser -- sequence/f )
[ sequence>> ] [ n>> ] bi [ sequence>> ] [ n>> ] bi
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
@ -158,35 +121,6 @@ TUPLE: sequence-parser sequence n ;
: parse-sequence ( sequence quot -- ) : parse-sequence ( sequence quot -- )
[ <sequence-parser> ] dip call ; inline [ <sequence-parser> ] dip call ; inline
:: take-quoted-string ( sequence-parser escape-char quote-char -- string )
sequence-parser n>> :> start-n
sequence-parser advance
[
{
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
[ current quote-char = not ]
} 1||
] take-while :> string
sequence-parser current quote-char = [
sequence-parser advance* string
] [
start-n sequence-parser (>>n) f
] if ;
: (take-token) ( sequence-parser -- string )
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
:: take-token* ( sequence-parser escape-char quote-char -- string/f )
sequence-parser skip-whitespace
dup current {
{ quote-char [ escape-char quote-char take-quoted-string ] }
{ f [ drop f ] }
[ drop (take-token) ]
} case ;
: take-token ( sequence-parser -- string/f )
CHAR: \ CHAR: " take-token* ;
: take-integer ( sequence-parser -- n/f ) : take-integer ( sequence-parser -- n/f )
[ current digit? ] take-while ; [ current digit? ] take-while ;
@ -198,27 +132,6 @@ TUPLE: sequence-parser sequence n ;
sequence-parser [ n + ] change-n drop sequence-parser [ n + ] change-n drop
] if ; ] if ;
: c-identifier-begin? ( ch -- ? )
CHAR: a CHAR: z [a,b]
CHAR: A CHAR: Z [a,b]
{ CHAR: _ } 3append member? ;
: c-identifier-ch? ( ch -- ? )
CHAR: a CHAR: z [a,b]
CHAR: A CHAR: Z [a,b]
CHAR: 0 CHAR: 9 [a,b]
{ CHAR: _ } 4 nappend member? ;
: (take-c-identifier) ( sequence-parser -- string/f )
dup current c-identifier-begin? [
[ current c-identifier-ch? ] take-while
] [
drop f
] if ;
: take-c-identifier ( sequence-parser -- string/f )
[ (take-c-identifier) ] with-sequence-parser ;
<< "length" [ length ] define-sorting >> << "length" [ length ] define-sorting >>
: sort-tokens ( seq -- seq' ) : sort-tokens ( seq -- seq' )
@ -228,34 +141,8 @@ TUPLE: sequence-parser sequence n ;
swap swap
'[ _ [ swap take-sequence ] with-sequence-parser ] find nip ; '[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
: take-longest ( sequence-parser seq -- seq ) : take-longest ( sequence-parser seq -- seq )
sort-tokens take-first-matching ; sort-tokens take-first-matching ;
: take-c-integer ( sequence-parser -- string/f )
[
dup take-integer [
swap
{ "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" }
take-longest [ append ] when*
] [
drop f
] if*
] with-sequence-parser ;
CONSTANT: c-punctuators
{
"[" "]" "(" ")" "{" "}" "." "->"
"++" "--" "&" "*" "+" "-" "~" "!"
"/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
"?" ":" ";" "..."
"=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
"," "#" "##"
"<:" ":>" "<%" "%>" "%:" "%:%:"
}
: take-c-punctuator ( sequence-parser -- string/f )
c-punctuators take-longest ;
: write-full ( sequence-parser -- ) sequence>> write ; : write-full ( sequence-parser -- ) sequence>> write ;
: write-rest ( sequence-parser -- ) take-rest write ; : write-rest ( sequence-parser -- ) take-rest write ;