From 1660be50a477ee75c77808467a1b8d4ceadb7cc0 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 1 Aug 2009 20:42:29 -0500 Subject: [PATCH] remove some c parsing words from sequence-parser --- extra/c/lexer/authors.txt | 1 + extra/c/lexer/lexer-tests.factor | 103 +++++++++++++++ extra/c/lexer/lexer.factor | 123 ++++++++++++++++++ extra/c/preprocessor/preprocessor.factor | 2 +- .../sequence-parser-tests.factor | 98 -------------- extra/sequence-parser/sequence-parser.factor | 119 +---------------- 6 files changed, 231 insertions(+), 215 deletions(-) create mode 100644 extra/c/lexer/authors.txt create mode 100644 extra/c/lexer/lexer-tests.factor create mode 100644 extra/c/lexer/lexer.factor diff --git a/extra/c/lexer/authors.txt b/extra/c/lexer/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/c/lexer/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/c/lexer/lexer-tests.factor b/extra/c/lexer/lexer-tests.factor new file mode 100644 index 0000000000..c972b8816c --- /dev/null +++ b/extra/c/lexer/lexer-tests.factor @@ -0,0 +1,103 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors c.lexer kernel sequence-parser tools.test ; +IN: c.lexer.tests + +[ 36 ] +[ + " //jofiejoe\n //eoieow\n/*asdf*/\n " + skip-whitespace/comments n>> +] unit-test + +[ f "33asdf" ] +[ "33asdf" [ take-c-identifier ] [ take-rest ] bi ] unit-test + +[ "asdf" ] +[ "asdf" take-c-identifier ] unit-test + +[ "_asdf" ] +[ "_asdf" take-c-identifier ] unit-test + +[ "_asdf400" ] +[ "_asdf400" take-c-identifier ] unit-test + +[ "asdfasdf" ] [ + "/*asdfasdf*/" take-c-comment +] unit-test + +[ "k" ] [ + "/*asdfasdf*/k" [ take-c-comment drop ] [ take-rest ] bi +] unit-test + +[ "omg" ] [ + "//asdfasdf\nomg" + [ take-c++-comment drop ] [ take-rest ] bi +] unit-test + +[ "omg" ] [ + "omg" + [ take-c++-comment drop ] [ take-rest ] bi +] unit-test + +[ "/*asdfasdf" ] [ + "/*asdfasdf" [ take-c-comment drop ] [ take-rest ] bi +] unit-test + +[ "asdf" "eoieoei" ] [ + "//asdf\neoieoei" + [ take-c++-comment ] [ take-rest ] bi +] unit-test + +[ f ] +[ + "\"abc\" asdf" + [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi +] unit-test + +[ "abc\\\"def" ] +[ + "\"abc\\\"def\" asdf" + CHAR: \ CHAR: " take-quoted-string +] unit-test + +[ "asdf" ] +[ + "\"abc\" asdf" + [ CHAR: \ CHAR: " take-quoted-string drop ] + [ skip-whitespace "asdf" take-sequence ] bi +] unit-test + +[ f ] +[ + "\"abc asdf" + CHAR: \ CHAR: " take-quoted-string +] unit-test + +[ "\"abc" ] +[ + "\"abc asdf" + [ CHAR: \ CHAR: " take-quoted-string drop ] + [ "\"abc" take-sequence ] bi +] unit-test + +[ "c" ] +[ "c" take-token ] unit-test + +[ f ] +[ "" take-token ] unit-test + +[ "abcd e \\\"f g" ] +[ "\"abcd e \\\"f g\"" CHAR: \ CHAR: " take-token* ] unit-test + +[ "123" ] +[ "123jjj" take-c-integer ] unit-test + +[ "123uLL" ] +[ "123uLL" take-c-integer ] unit-test + +[ "123ull" ] +[ "123ull" take-c-integer ] unit-test + +[ "123u" ] +[ "123u" take-c-integer ] unit-test + diff --git a/extra/c/lexer/lexer.factor b/extra/c/lexer/lexer.factor new file mode 100644 index 0000000000..962407e6ec --- /dev/null +++ b/extra/c/lexer/lexer.factor @@ -0,0 +1,123 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors combinators combinators.short-circuit +generalizations kernel locals math.order math.ranges +sequence-parser sequences sorting.functor sorting.slots +unicode.categories ; +IN: c.lexer + +: take-c-comment ( sequence-parser -- seq/f ) + [ + dup "/*" take-sequence [ + "*/" take-until-sequence* + ] [ + drop f + ] if + ] with-sequence-parser ; + +: take-c++-comment ( sequence-parser -- seq/f ) + [ + dup "//" take-sequence [ + [ + [ + { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1|| + ] take-until + ] [ + advance drop + ] bi + ] [ + drop f + ] if + ] with-sequence-parser ; + +: skip-whitespace/comments ( sequence-parser -- sequence-parser ) + skip-whitespace-eol + { + { [ dup take-c-comment ] [ skip-whitespace/comments ] } + { [ dup take-c++-comment ] [ skip-whitespace/comments ] } + [ ] + } cond ; + +: take-define-identifier ( sequence-parser -- string ) + skip-whitespace/comments + [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ; + +:: take-quoted-string ( sequence-parser escape-char quote-char -- string ) + sequence-parser n>> :> start-n + sequence-parser advance + [ + { + [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ] + [ current quote-char = not ] + } 1|| + ] take-while :> string + sequence-parser current quote-char = [ + sequence-parser advance* string + ] [ + start-n sequence-parser (>>n) f + ] if ; + +: (take-token) ( sequence-parser -- string ) + skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ; + +:: take-token* ( sequence-parser escape-char quote-char -- string/f ) + sequence-parser skip-whitespace + dup current { + { quote-char [ escape-char quote-char take-quoted-string ] } + { f [ drop f ] } + [ drop (take-token) ] + } case ; + +: take-token ( sequence-parser -- string/f ) + CHAR: \ CHAR: " take-token* ; + +: c-identifier-begin? ( ch -- ? ) + CHAR: a CHAR: z [a,b] + CHAR: A CHAR: Z [a,b] + { CHAR: _ } 3append member? ; + +: c-identifier-ch? ( ch -- ? ) + CHAR: a CHAR: z [a,b] + CHAR: A CHAR: Z [a,b] + CHAR: 0 CHAR: 9 [a,b] + { CHAR: _ } 4 nappend member? ; + +: (take-c-identifier) ( sequence-parser -- string/f ) + dup current c-identifier-begin? [ + [ current c-identifier-ch? ] take-while + ] [ + drop f + ] if ; + +: take-c-identifier ( sequence-parser -- string/f ) + [ (take-c-identifier) ] with-sequence-parser ; + +<< "length" [ length ] define-sorting >> + +: sort-tokens ( seq -- seq' ) + { length>=< <=> } sort-by ; + +: take-c-integer ( sequence-parser -- string/f ) + [ + dup take-integer [ + swap + { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" } + take-longest [ append ] when* + ] [ + drop f + ] if* + ] with-sequence-parser ; + +CONSTANT: c-punctuators + { + "[" "]" "(" ")" "{" "}" "." "->" + "++" "--" "&" "*" "+" "-" "~" "!" + "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" + "?" ":" ";" "..." + "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" + "," "#" "##" + "<:" ":>" "<%" "%>" "%:" "%:%:" + } + +: take-c-punctuator ( sequence-parser -- string/f ) + c-punctuators take-longest ; diff --git a/extra/c/preprocessor/preprocessor.factor b/extra/c/preprocessor/preprocessor.factor index f787befc31..3018fa7a24 100644 --- a/extra/c/preprocessor/preprocessor.factor +++ b/extra/c/preprocessor/preprocessor.factor @@ -4,7 +4,7 @@ USING: sequence-parser io io.encodings.utf8 io.files io.streams.string kernel combinators accessors io.pathnames fry sequences arrays locals namespaces io.directories assocs math splitting make unicode.categories -combinators.short-circuit ; +combinators.short-circuit c.lexer ; IN: c.preprocessor : initial-library-paths ( -- seq ) diff --git a/extra/sequence-parser/sequence-parser-tests.factor b/extra/sequence-parser/sequence-parser-tests.factor index 259fb9f259..af13e5b86e 100644 --- a/extra/sequence-parser/sequence-parser-tests.factor +++ b/extra/sequence-parser/sequence-parser-tests.factor @@ -77,47 +77,6 @@ IN: sequence-parser.tests [ "cd" ] [ "abcd" [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test -[ f ] -[ - "\"abc\" asdf" - [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi -] unit-test - -[ "abc\\\"def" ] -[ - "\"abc\\\"def\" asdf" - CHAR: \ CHAR: " take-quoted-string -] unit-test - -[ "asdf" ] -[ - "\"abc\" asdf" - [ CHAR: \ CHAR: " take-quoted-string drop ] - [ skip-whitespace "asdf" take-sequence ] bi -] unit-test - -[ f ] -[ - "\"abc asdf" - CHAR: \ CHAR: " take-quoted-string -] unit-test - -[ "\"abc" ] -[ - "\"abc asdf" - [ CHAR: \ CHAR: " take-quoted-string drop ] - [ "\"abc" take-sequence ] bi -] unit-test - -[ "c" ] -[ "c" take-token ] unit-test - -[ f ] -[ "" take-token ] unit-test - -[ "abcd e \\\"f g" ] -[ "\"abcd e \\\"f g\"" CHAR: \ CHAR: " take-token* ] unit-test - [ f ] [ "" take-rest ] unit-test @@ -140,63 +99,6 @@ IN: sequence-parser.tests [ "abcd" ] [ "abcd" 4 take-n ] unit-test [ "abcd" "efg" ] [ "abcdefg" [ 4 take-n ] [ take-rest ] bi ] unit-test -[ "asdfasdf" ] [ - "/*asdfasdf*/" take-c-comment -] unit-test - -[ "k" ] [ - "/*asdfasdf*/k" [ take-c-comment drop ] [ take-rest ] bi -] unit-test - -[ "omg" ] [ - "//asdfasdf\nomg" - [ take-c++-comment drop ] [ take-rest ] bi -] unit-test - -[ "omg" ] [ - "omg" - [ take-c++-comment drop ] [ take-rest ] bi -] unit-test - -[ "/*asdfasdf" ] [ - "/*asdfasdf" [ take-c-comment drop ] [ take-rest ] bi -] unit-test - -[ "asdf" "eoieoei" ] [ - "//asdf\neoieoei" - [ take-c++-comment ] [ take-rest ] bi -] unit-test - -[ f "33asdf" ] -[ "33asdf" [ take-c-identifier ] [ take-rest ] bi ] unit-test - -[ "asdf" ] -[ "asdf" take-c-identifier ] unit-test - -[ "_asdf" ] -[ "_asdf" take-c-identifier ] unit-test - -[ "_asdf400" ] -[ "_asdf400" take-c-identifier ] unit-test - -[ "123" ] -[ "123jjj" take-c-integer ] unit-test - -[ "123uLL" ] -[ "123uLL" take-c-integer ] unit-test - -[ "123ull" ] -[ "123ull" take-c-integer ] unit-test - -[ "123u" ] -[ "123u" take-c-integer ] unit-test - -[ 36 ] -[ - " //jofiejoe\n //eoieow\n/*asdf*/\n " - skip-whitespace/comments n>> -] unit-test - [ f ] [ "\n" take-integer ] unit-test diff --git a/extra/sequence-parser/sequence-parser.factor b/extra/sequence-parser/sequence-parser.factor index e46abe8090..0a6f3ef0db 100644 --- a/extra/sequence-parser/sequence-parser.factor +++ b/extra/sequence-parser/sequence-parser.factor @@ -1,9 +1,8 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: namespaces math kernel sequences accessors fry circular -unicode.case unicode.categories locals combinators.short-circuit -make combinators io splitting math.parser math.ranges -generalizations sorting.functor math.order sorting.slots ; +USING: accessors circular combinators.short-circuit fry io +kernel locals math math.order sequences sorting.functor +sorting.slots unicode.categories ; IN: sequence-parser TUPLE: sequence-parser sequence n ; @@ -109,42 +108,6 @@ TUPLE: sequence-parser sequence n ; : skip-whitespace-eol ( sequence-parser -- sequence-parser ) [ [ current " \t\r" member? not ] take-until drop ] keep ; -: take-c-comment ( sequence-parser -- seq/f ) - [ - dup "/*" take-sequence [ - "*/" take-until-sequence* - ] [ - drop f - ] if - ] with-sequence-parser ; - -: take-c++-comment ( sequence-parser -- seq/f ) - [ - dup "//" take-sequence [ - [ - [ - { [ current CHAR: \n = ] [ sequence-parse-end? ] } 1|| - ] take-until - ] [ - advance drop - ] bi - ] [ - drop f - ] if - ] with-sequence-parser ; - -: skip-whitespace/comments ( sequence-parser -- sequence-parser ) - skip-whitespace-eol - { - { [ dup take-c-comment ] [ skip-whitespace/comments ] } - { [ dup take-c++-comment ] [ skip-whitespace/comments ] } - [ ] - } cond ; - -: take-define-identifier ( sequence-parser -- string ) - skip-whitespace/comments - [ current { [ blank? ] [ CHAR: ( = ] } 1|| ] take-until ; - : take-rest-slice ( sequence-parser -- sequence/f ) [ sequence>> ] [ n>> ] bi 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline @@ -158,35 +121,6 @@ TUPLE: sequence-parser sequence n ; : parse-sequence ( sequence quot -- ) [ ] dip call ; inline -:: take-quoted-string ( sequence-parser escape-char quote-char -- string ) - sequence-parser n>> :> start-n - sequence-parser advance - [ - { - [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ] - [ current quote-char = not ] - } 1|| - ] take-while :> string - sequence-parser current quote-char = [ - sequence-parser advance* string - ] [ - start-n sequence-parser (>>n) f - ] if ; - -: (take-token) ( sequence-parser -- string ) - skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ; - -:: take-token* ( sequence-parser escape-char quote-char -- string/f ) - sequence-parser skip-whitespace - dup current { - { quote-char [ escape-char quote-char take-quoted-string ] } - { f [ drop f ] } - [ drop (take-token) ] - } case ; - -: take-token ( sequence-parser -- string/f ) - CHAR: \ CHAR: " take-token* ; - : take-integer ( sequence-parser -- n/f ) [ current digit? ] take-while ; @@ -198,27 +132,6 @@ TUPLE: sequence-parser sequence n ; sequence-parser [ n + ] change-n drop ] if ; -: c-identifier-begin? ( ch -- ? ) - CHAR: a CHAR: z [a,b] - CHAR: A CHAR: Z [a,b] - { CHAR: _ } 3append member? ; - -: c-identifier-ch? ( ch -- ? ) - CHAR: a CHAR: z [a,b] - CHAR: A CHAR: Z [a,b] - CHAR: 0 CHAR: 9 [a,b] - { CHAR: _ } 4 nappend member? ; - -: (take-c-identifier) ( sequence-parser -- string/f ) - dup current c-identifier-begin? [ - [ current c-identifier-ch? ] take-while - ] [ - drop f - ] if ; - -: take-c-identifier ( sequence-parser -- string/f ) - [ (take-c-identifier) ] with-sequence-parser ; - << "length" [ length ] define-sorting >> : sort-tokens ( seq -- seq' ) @@ -228,34 +141,8 @@ TUPLE: sequence-parser sequence n ; swap '[ _ [ swap take-sequence ] with-sequence-parser ] find nip ; - : take-longest ( sequence-parser seq -- seq ) sort-tokens take-first-matching ; -: take-c-integer ( sequence-parser -- string/f ) - [ - dup take-integer [ - swap - { "ull" "uLL" "Ull" "ULL" "ll" "LL" "l" "L" "u" "U" } - take-longest [ append ] when* - ] [ - drop f - ] if* - ] with-sequence-parser ; - -CONSTANT: c-punctuators - { - "[" "]" "(" ")" "{" "}" "." "->" - "++" "--" "&" "*" "+" "-" "~" "!" - "/" "%" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "^" "|" "&&" "||" - "?" ":" ";" "..." - "=" "*=" "/=" "%=" "+=" "-=" "<<=" ">>=" "&=" "^=" "|=" - "," "#" "##" - "<:" ":>" "<%" "%>" "%:" "%:%:" - } - -: take-c-punctuator ( sequence-parser -- string/f ) - c-punctuators take-longest ; - : write-full ( sequence-parser -- ) sequence>> write ; : write-rest ( sequence-parser -- ) take-rest write ;