more parsers for c syntax
parent
a368a8da51
commit
badbd014be
|
@ -1,4 +1,5 @@
|
||||||
USING: tools.test sequence-parser ascii kernel accessors ;
|
USING: tools.test sequence-parser unicode.categories kernel
|
||||||
|
accessors ;
|
||||||
IN: sequence-parser.tests
|
IN: sequence-parser.tests
|
||||||
|
|
||||||
[ "hello" ]
|
[ "hello" ]
|
||||||
|
@ -189,3 +190,15 @@ IN: sequence-parser.tests
|
||||||
|
|
||||||
[ "123u" ]
|
[ "123u" ]
|
||||||
[ "123u" <sequence-parser> take-c-integer ] unit-test
|
[ "123u" <sequence-parser> take-c-integer ] unit-test
|
||||||
|
|
||||||
|
[ 36 ]
|
||||||
|
[
|
||||||
|
" //jofiejoe\n //eoieow\n/*asdf*/\n "
|
||||||
|
<sequence-parser> skip-whitespace/comments n>>
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[ f ]
|
||||||
|
[ "\n" <sequence-parser> take-integer ] unit-test
|
||||||
|
|
||||||
|
[ "\n" ] [ "\n" <sequence-parser> [ ] take-while ] unit-test
|
||||||
|
[ f ] [ "\n" <sequence-parser> [ not ] take-while ] unit-test
|
||||||
|
|
|
@ -52,7 +52,7 @@ TUPLE: sequence-parser sequence n ;
|
||||||
] [
|
] [
|
||||||
[ drop n>> ]
|
[ drop n>> ]
|
||||||
[ skip-until ]
|
[ skip-until ]
|
||||||
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq
|
[ drop [ n>> ] [ sequence>> ] bi ] 2tri subseq f like
|
||||||
] if ; inline
|
] if ; inline
|
||||||
|
|
||||||
: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
|
: take-while ( sequence-parser quot: ( obj -- ? ) -- sequence/f )
|
||||||
|
@ -104,6 +104,45 @@ TUPLE: sequence-parser sequence n ;
|
||||||
: skip-whitespace ( sequence-parser -- sequence-parser )
|
: skip-whitespace ( sequence-parser -- sequence-parser )
|
||||||
[ [ current blank? not ] take-until drop ] keep ;
|
[ [ current blank? not ] take-until drop ] keep ;
|
||||||
|
|
||||||
|
: skip-whitespace-eol ( sequence-parser -- sequence-parser )
|
||||||
|
[ [ current " \t\r" member? not ] take-until drop ] keep ;
|
||||||
|
|
||||||
|
: take-c-comment ( sequence-parser -- seq/f )
|
||||||
|
[
|
||||||
|
dup "/*" take-sequence [
|
||||||
|
"*/" take-until-sequence*
|
||||||
|
] [
|
||||||
|
drop f
|
||||||
|
] if
|
||||||
|
] with-sequence-parser ;
|
||||||
|
|
||||||
|
: take-c++-comment ( sequence-parser -- seq/f )
|
||||||
|
[
|
||||||
|
dup "//" take-sequence [
|
||||||
|
[
|
||||||
|
[
|
||||||
|
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
|
||||||
|
] take-until
|
||||||
|
] [
|
||||||
|
advance drop
|
||||||
|
] bi
|
||||||
|
] [
|
||||||
|
drop f
|
||||||
|
] if
|
||||||
|
] with-sequence-parser ;
|
||||||
|
|
||||||
|
: skip-whitespace/comments ( sequence-parser -- sequence-parser )
|
||||||
|
skip-whitespace-eol
|
||||||
|
{
|
||||||
|
{ [ dup take-c-comment ] [ skip-whitespace/comments ] }
|
||||||
|
{ [ dup take-c++-comment ] [ skip-whitespace/comments ] }
|
||||||
|
[ ]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
|
: take-define-identifier ( sequence-parser -- string )
|
||||||
|
skip-whitespace/comments
|
||||||
|
[ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ;
|
||||||
|
|
||||||
: take-rest-slice ( sequence-parser -- sequence/f )
|
: take-rest-slice ( sequence-parser -- sequence/f )
|
||||||
[ sequence>> ] [ n>> ] bi
|
[ sequence>> ] [ n>> ] bi
|
||||||
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
|
2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline
|
||||||
|
@ -157,30 +196,6 @@ TUPLE: sequence-parser sequence n ;
|
||||||
sequence-parser [ n + ] change-n drop
|
sequence-parser [ n + ] change-n drop
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: take-c-comment ( sequence-parser -- seq/f )
|
|
||||||
[
|
|
||||||
dup "/*" take-sequence [
|
|
||||||
"*/" take-until-sequence*
|
|
||||||
] [
|
|
||||||
drop f
|
|
||||||
] if
|
|
||||||
] with-sequence-parser ;
|
|
||||||
|
|
||||||
: take-c++-comment ( sequence-parser -- seq/f )
|
|
||||||
[
|
|
||||||
dup "//" take-sequence [
|
|
||||||
[
|
|
||||||
[
|
|
||||||
{ [ current CHAR: \n = ] [ sequence-parse-end? ] } 1||
|
|
||||||
] take-until
|
|
||||||
] [
|
|
||||||
advance drop
|
|
||||||
] bi
|
|
||||||
] [
|
|
||||||
drop f
|
|
||||||
] if
|
|
||||||
] with-sequence-parser ;
|
|
||||||
|
|
||||||
: c-identifier-begin? ( ch -- ? )
|
: c-identifier-begin? ( ch -- ? )
|
||||||
CHAR: a CHAR: z [a,b]
|
CHAR: a CHAR: z [a,b]
|
||||||
CHAR: A CHAR: Z [a,b]
|
CHAR: A CHAR: Z [a,b]
|
||||||
|
@ -192,29 +207,30 @@ TUPLE: sequence-parser sequence n ;
|
||||||
CHAR: 0 CHAR: 9 [a,b]
|
CHAR: 0 CHAR: 9 [a,b]
|
||||||
{ CHAR: _ } 4 nappend member? ;
|
{ CHAR: _ } 4 nappend member? ;
|
||||||
|
|
||||||
: take-c-identifier ( state-parser -- string/f )
|
: (take-c-identifier) ( sequence-parser -- string/f )
|
||||||
[
|
|
||||||
dup current c-identifier-begin? [
|
dup current c-identifier-begin? [
|
||||||
[ current c-identifier-ch? ] take-while
|
[ current c-identifier-ch? ] take-while
|
||||||
] [
|
] [
|
||||||
drop f
|
drop f
|
||||||
] if
|
] if ;
|
||||||
] with-sequence-parser ;
|
|
||||||
|
: take-c-identifier ( sequence-parser -- string/f )
|
||||||
|
[ (take-c-identifier) ] with-sequence-parser ;
|
||||||
|
|
||||||
<< "length" [ length ] define-sorting >>
|
<< "length" [ length ] define-sorting >>
|
||||||
|
|
||||||
: sort-tokens ( seq -- seq' )
|
: sort-tokens ( seq -- seq' )
|
||||||
{ length>=< <=> } sort-by ;
|
{ length>=< <=> } sort-by ;
|
||||||
|
|
||||||
: take-first-matching ( state-parser seq -- seq )
|
: take-first-matching ( sequence-parser seq -- seq )
|
||||||
swap
|
swap
|
||||||
'[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
|
'[ _ [ swap take-sequence ] with-sequence-parser ] find nip ;
|
||||||
|
|
||||||
|
|
||||||
: take-longest ( state-parser seq -- seq )
|
: take-longest ( sequence-parser seq -- seq )
|
||||||
sort-tokens take-first-matching ;
|
sort-tokens take-first-matching ;
|
||||||
|
|
||||||
: take-c-integer ( state-parser -- string/f )
|
: take-c-integer ( sequence-parser -- string/f )
|
||||||
[
|
[
|
||||||
dup take-integer [
|
dup take-integer [
|
||||||
swap
|
swap
|
||||||
|
@ -225,5 +241,19 @@ TUPLE: sequence-parser sequence n ;
|
||||||
] if*
|
] if*
|
||||||
] with-sequence-parser ;
|
] with-sequence-parser ;
|
||||||
|
|
||||||
|
CONSTANT: c-punctuators
|
||||||
|
{
|
||||||
|
"[" "]" "(" ")" "{" "}" "." "->"
|
||||||
|
"++" "--" "&" "*" "+" "-" "~" "!"
|
||||||
|
"/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||"
|
||||||
|
"?" ":" ";" "..."
|
||||||
|
"=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|="
|
||||||
|
"," "#" "##"
|
||||||
|
"<:" ":>" "<%" "%>" "%:" "%:%:"
|
||||||
|
}
|
||||||
|
|
||||||
|
: take-c-punctuator ( sequence-parser -- string/f )
|
||||||
|
c-punctuators take-longest ;
|
||||||
|
|
||||||
: write-full ( sequence-parser -- ) sequence>> write ;
|
: write-full ( sequence-parser -- ) sequence>> write ;
|
||||||
: write-rest ( sequence-parser -- ) take-rest write ;
|
: write-rest ( sequence-parser -- ) take-rest write ;
|
||||||
|
|
Loading…
Reference in New Issue