From a10d490fe2e318de5d55038983474012933abdfc Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 10 Apr 2009 17:50:05 -0500 Subject: [PATCH] more parsing work --- extra/c/preprocessor/preprocessor.factor | 68 +++++++++++-------- .../sequence-parser-tests.factor | 41 ++++++++++- extra/sequence-parser/sequence-parser.factor | 64 ++++++++++++++++- 3 files changed, 140 insertions(+), 33 deletions(-) diff --git a/extra/c/preprocessor/preprocessor.factor b/extra/c/preprocessor/preprocessor.factor index e5029ca683..f787befc31 100644 --- a/extra/c/preprocessor/preprocessor.factor +++ b/extra/c/preprocessor/preprocessor.factor @@ -41,7 +41,7 @@ ifs elifs elses ; DEFER: preprocess-file -ERROR: unknown-c-preprocessor state-parser name ; +ERROR: unknown-c-preprocessor sequence-parser name ; ERROR: bad-include-line line ; @@ -69,8 +69,16 @@ ERROR: header-file-missing path ; drop ] if ; -: handle-include ( preprocessor-state state-parser -- ) - skip-whitespace advance dup previous { +: skip-whitespace/comments ( sequence-parser -- sequence-parser ) + skip-whitespace + { + { [ dup take-c-comment ] [ skip-whitespace/comments ] } + { [ dup take-c++-comment ] [ skip-whitespace/comments ] } + [ ] + } cond ; + +: handle-include ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments advance dup previous { { CHAR: < [ CHAR: > take-until-object read-standard-include ] } { CHAR: " [ CHAR: " take-until-object read-local-include ] } [ bad-include-line ] @@ -81,58 +89,58 @@ ERROR: header-file-missing path ; : readlns ( -- string ) [ (readlns) ] { } make concat ; -: take-define-identifier ( state-parser -- string ) - skip-whitespace +: take-define-identifier ( sequence-parser -- string ) + skip-whitespace/comments [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ; -: handle-define ( preprocessor-state state-parser -- ) +: handle-define ( preprocessor-state sequence-parser -- ) [ take-define-identifier ] - [ skip-whitespace take-rest ] bi + [ skip-whitespace/comments take-rest ] bi "\\" ?tail [ readlns append ] when spin symbol-table>> set-at ; -: handle-undef ( preprocessor-state state-parser -- ) +: handle-undef ( preprocessor-state sequence-parser -- ) take-token swap symbol-table>> delete-at ; -: handle-ifdef ( preprocessor-state state-parser -- ) +: handle-ifdef ( preprocessor-state sequence-parser -- ) [ [ 1 + ] change-ifdef-nesting ] dip take-token over symbol-table>> key? [ drop ] [ t >>processing-disabled? drop ] if ; -: handle-ifndef ( preprocessor-state state-parser -- ) +: handle-ifndef ( preprocessor-state sequence-parser -- ) [ [ 1 + ] change-ifdef-nesting ] dip take-token over symbol-table>> key? [ t >>processing-disabled? drop ] [ drop ] if ; -: handle-endif ( preprocessor-state state-parser -- ) +: handle-endif ( preprocessor-state sequence-parser -- ) drop [ 1 - ] change-ifdef-nesting drop ; -: handle-if ( preprocessor-state state-parser -- ) +: handle-if ( preprocessor-state sequence-parser -- ) [ [ 1 + ] change-ifdef-nesting ] dip - skip-whitespace take-rest swap ifs>> push ; + skip-whitespace/comments take-rest swap ifs>> push ; -: handle-elif ( preprocessor-state state-parser -- ) - skip-whitespace take-rest swap elifs>> push ; +: handle-elif ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap elifs>> push ; -: handle-else ( preprocessor-state state-parser -- ) - skip-whitespace take-rest swap elses>> push ; +: handle-else ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap elses>> push ; -: handle-pragma ( preprocessor-state state-parser -- ) - skip-whitespace take-rest swap pragmas>> push ; +: handle-pragma ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap pragmas>> push ; -: handle-include-next ( preprocessor-state state-parser -- ) - skip-whitespace take-rest swap include-nexts>> push ; +: handle-include-next ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap include-nexts>> push ; -: handle-error ( preprocessor-state state-parser -- ) - skip-whitespace take-rest swap errors>> push ; +: handle-error ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap errors>> push ; ! nip take-rest throw ; -: handle-warning ( preprocessor-state state-parser -- ) - skip-whitespace +: handle-warning ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments take-rest swap warnings>> push ; -: parse-directive ( preprocessor-state state-parser string -- ) +: parse-directive ( preprocessor-state sequence-parser string -- ) { { "warning" [ handle-warning ] } { "error" [ handle-error ] } @@ -150,7 +158,7 @@ ERROR: header-file-missing path ; [ unknown-c-preprocessor ] } case ; -: parse-directive-line ( preprocessor-state state-parser -- ) +: parse-directive-line ( preprocessor-state sequence-parser -- ) advance dup take-token pick processing-disabled?>> [ "endif" = [ @@ -162,14 +170,14 @@ ERROR: header-file-missing path ; parse-directive ] if ; -: preprocess-line ( preprocessor-state state-parser -- ) - skip-whitespace dup current CHAR: # = +: preprocess-line ( preprocessor-state sequence-parser -- ) + skip-whitespace/comments dup current CHAR: # = [ parse-directive-line ] [ swap processing-disabled?>> [ drop ] [ write-full nl ] if ] if ; : preprocess-lines ( preprocessor-state -- ) readln - [ [ preprocess-line ] [ drop preprocess-lines ] 2bi ] + [ [ preprocess-line ] [ drop preprocess-lines ] 2bi ] [ drop ] if* ; ERROR: include-nested-too-deeply ; diff --git a/extra/sequence-parser/sequence-parser-tests.factor b/extra/sequence-parser/sequence-parser-tests.factor index f6339b7127..3b2fcad5eb 100644 --- a/extra/sequence-parser/sequence-parser-tests.factor +++ b/extra/sequence-parser/sequence-parser-tests.factor @@ -126,7 +126,7 @@ IN: sequence-parser.tests [ f ] [ "abc" "abcdefg" take-sequence ] unit-test -[ 1234 ] +[ "1234" ] [ "1234f" take-integer ] unit-test [ "yes" ] @@ -147,6 +147,45 @@ IN: sequence-parser.tests "/*asdfasdf*/k" [ take-c-comment drop ] [ take-rest ] bi ] unit-test +[ "omg" ] [ + "//asdfasdf\nomg" + [ take-c++-comment drop ] [ take-rest ] bi +] unit-test + +[ "omg" ] [ + "omg" + [ take-c++-comment drop ] [ take-rest ] bi +] unit-test + [ "/*asdfasdf" ] [ "/*asdfasdf" [ take-c-comment drop ] [ take-rest ] bi ] unit-test + +[ "asdf" "eoieoei" ] [ + "//asdf\neoieoei" + [ take-c++-comment ] [ take-rest ] bi +] unit-test + +[ f "33asdf" ] +[ "33asdf" [ take-c-identifier ] [ take-rest ] bi ] unit-test + +[ "asdf" ] +[ "asdf" take-c-identifier ] unit-test + +[ "_asdf" ] +[ "_asdf" take-c-identifier ] unit-test + +[ "_asdf400" ] +[ "_asdf400" take-c-identifier ] unit-test + +[ "123" ] +[ "123jjj" take-c-integer ] unit-test + +[ "123uLL" ] +[ "123uLL" take-c-integer ] unit-test + +[ "123ull" ] +[ "123ull" take-c-integer ] unit-test + +[ "123u" ] +[ "123u" take-c-integer ] unit-test diff --git a/extra/sequence-parser/sequence-parser.factor b/extra/sequence-parser/sequence-parser.factor index d5adc56800..4f57a7ccae 100644 --- a/extra/sequence-parser/sequence-parser.factor +++ b/extra/sequence-parser/sequence-parser.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular unicode.case unicode.categories locals combinators.short-circuit -make combinators io splitting math.parser ; +make combinators io splitting math.parser math.ranges +generalizations sorting.functor math.order sorting.slots ; IN: sequence-parser TUPLE: sequence-parser sequence n ; @@ -146,7 +147,7 @@ TUPLE: sequence-parser sequence n ; CHAR: \ CHAR: " take-token* ; : take-integer ( sequence-parser -- n/f ) - [ current digit? ] take-while string>number ; + [ current digit? ] take-while ; :: take-n ( sequence-parser n -- seq/f ) n sequence-parser [ n>> + ] [ sequence>> length ] bi > [ @@ -165,5 +166,64 @@ TUPLE: sequence-parser sequence n ; ] if ] with-sequence-parser ; +: take-c++-comment ( sequence-parser -- seq/f ) + [ + dup "//" take-sequence [ + [ + [ + { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1|| + ] take-until + ] [ + advance drop + ] bi + ] [ + drop f + ] if + ] with-sequence-parser ; + +: c-identifier-begin? ( ch -- ? ) + CHAR: a CHAR: z [a,b] + CHAR: A CHAR: Z [a,b] + { CHAR: _ } 3append member? ; + +: c-identifier-ch? ( ch -- ? ) + CHAR: a CHAR: z [a,b] + CHAR: A CHAR: Z [a,b] + CHAR: 0 CHAR: 9 [a,b] + { CHAR: _ } 4 nappend member? ; + +: take-c-identifier ( state-parser -- string/f ) + [ + dup current c-identifier-begin? [ + [ current c-identifier-ch? ] take-while + ] [ + drop f + ] if + ] with-sequence-parser ; + +<< "length" [ length ] define-sorting >> + +: sort-tokens ( seq -- seq' ) + { length>=< <=> } sort-by ; + +: take-first-matching ( state-parser seq -- seq ) + swap + '[ _ [ swap take-sequence ] with-sequence-parser ] find nip ; + + +: take-longest ( state-parser seq -- seq ) + sort-tokens take-first-matching ; + +: take-c-integer ( state-parser -- string/f ) + [ + dup take-integer [ + swap + { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" } + take-longest [ append ] when* + ] [ + drop f + ] if* + ] with-sequence-parser ; + : write-full ( sequence-parser -- ) sequence>> write ; : write-rest ( sequence-parser -- ) take-rest write ;