From 9bee1fe0041be56b814fe47d0cbe09173a8bdcda Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 1 Apr 2009 22:39:20 -0500 Subject: [PATCH 1/4] fix take-rest for out of bounds --- extra/html/parser/state/state-tests.factor | 6 ++++++ extra/html/parser/state/state.factor | 13 ++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index 63916a3c1c..75db1a373e 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -93,3 +93,9 @@ IN: html.parser.state.tests [ "abcd e \\\"f g" ] [ "\"abcd e \\\"f g\"" CHAR: \ CHAR: " take-token* ] unit-test + +[ "" ] +[ "" take-rest ] unit-test + +[ "" ] +[ "abc" dup "abc" take-sequence drop take-rest ] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index 86adb0f914..b7936f6005 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular -unicode.case unicode.categories locals combinators.short-circuit -make combinators ; +unicode.case ascii locals combinators.short-circuit +make combinators io splitting ; IN: html.parser.state @@ -74,8 +74,12 @@ TUPLE: state-parser sequence n ; : skip-whitespace ( state-parser -- state-parser ) [ [ current blank? not ] take-until drop ] keep ; +: take-rest-slice ( state-parser -- sequence/f ) + [ sequence>> ] [ n>> ] bi + 2dup [ length ] dip < [ 2drop f ] [ tail-slice ] if ; inline + : take-rest ( state-parser -- sequence ) - [ drop f ] take-until ; inline + [ take-rest-slice ] [ sequence>> like ] bi ; : take-until-object ( state-parser obj -- sequence ) '[ current _ = ] take-until ; @@ -111,3 +115,6 @@ TUPLE: state-parser sequence n ; : take-token ( state-parser -- string/f ) CHAR: \ CHAR: " take-token* ; + +: write-full ( state-parser -- ) sequence>> write ; +: write-rest ( state-parser -- ) take-rest write ; From 4ef0344477d4619c5127579279395a2b74aa7289 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 2 Apr 2009 01:12:09 -0500 Subject: [PATCH 2/4] Tabs are blank (better unicode whitespace support coming soon) --- basis/unicode/categories/categories-tests.factor | 5 +++++ basis/unicode/categories/categories.factor | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/basis/unicode/categories/categories-tests.factor b/basis/unicode/categories/categories-tests.factor index 1e718cf9b7..0970df7ad8 100644 --- a/basis/unicode/categories/categories-tests.factor +++ b/basis/unicode/categories/categories-tests.factor @@ -12,3 +12,8 @@ IN: unicode.categories.tests [ "Lo" ] [ HEX: 3450 category ] unit-test [ "Lo" ] [ HEX: 4DB5 category ] unit-test [ "Cs" ] [ HEX: DD00 category ] unit-test +[ t ] [ CHAR: \t blank? ] unit-test +[ t ] [ CHAR: \s blank? ] unit-test +[ t ] [ CHAR: \r blank? ] unit-test +[ t ] [ CHAR: \n blank? ] unit-test +[ f ] [ CHAR: a blank? ] unit-test diff --git a/basis/unicode/categories/categories.factor b/basis/unicode/categories/categories.factor index 126c03c869..4ca5c9a90e 100644 --- a/basis/unicode/categories/categories.factor +++ b/basis/unicode/categories/categories.factor @@ -3,7 +3,7 @@ USING: unicode.categories.syntax sequences unicode.data ; IN: unicode.categories -CATEGORY: blank Zs Zl Zp | "\r\n" member? ; +CATEGORY: blank Zs Zl Zp | "\r\n\t" member? ; CATEGORY: letter Ll | "Other_Lowercase" property? ; CATEGORY: LETTER Lu | "Other_Uppercase" property? ; CATEGORY: Letter Lu Ll Lt Lm Lo Nl ; From 2325710a4ff61ddbf3624e458e7dff391065622f Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 2 Apr 2009 01:17:36 -0500 Subject: [PATCH 3/4] beginnings of a c preprocessor -- needs #if, #elif, #else --- extra/c/preprocessor/authors.txt | 1 + .../c/preprocessor/preprocessor-tests.factor | 16 ++ extra/c/preprocessor/preprocessor.factor | 155 ++++++++++++++++++ extra/c/tests/test1/README | 1 + extra/c/tests/test1/hi.h | 1 + extra/c/tests/test1/lo.h | 1 + extra/c/tests/test1/test1.c | 1 + extra/c/tests/test2/README | 1 + extra/c/tests/test2/test2.c | 17 ++ extra/c/tests/test3/README | 1 + extra/c/tests/test3/test3.c | 1 + extra/c/tests/test4/test4.c | 2 + 12 files changed, 198 insertions(+) create mode 100644 extra/c/preprocessor/authors.txt create mode 100644 extra/c/preprocessor/preprocessor-tests.factor create mode 100644 extra/c/preprocessor/preprocessor.factor create mode 100644 extra/c/tests/test1/README create mode 100644 extra/c/tests/test1/hi.h create mode 100644 extra/c/tests/test1/lo.h create mode 100644 extra/c/tests/test1/test1.c create mode 100644 extra/c/tests/test2/README create mode 100644 extra/c/tests/test2/test2.c create mode 100644 extra/c/tests/test3/README create mode 100644 extra/c/tests/test3/test3.c create mode 100644 extra/c/tests/test4/test4.c diff --git a/extra/c/preprocessor/authors.txt b/extra/c/preprocessor/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/c/preprocessor/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/c/preprocessor/preprocessor-tests.factor b/extra/c/preprocessor/preprocessor-tests.factor new file mode 100644 index 0000000000..d86b85a1b1 --- /dev/null +++ b/extra/c/preprocessor/preprocessor-tests.factor @@ -0,0 +1,16 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test c.preprocessor kernel accessors ; +IN: c.preprocessor.tests + +[ "vocab:c/tests/test1/test1.c" start-preprocess-file ] +[ include-nested-too-deeply? ] must-fail-with + +[ "yo\n\n\n\nyo4\n" ] +[ "vocab:c/tests/test2/test2.c" start-preprocess-file nip ] unit-test + +[ "vocab:c/tests/test3/test3.c" start-preprocess-file ] +[ "\"BOO\"" = ] must-fail-with + +[ V{ "\"omg\"" "\"lol\"" } ] +[ "vocab:c/tests/test4/test4.c" start-preprocess-file drop warnings>> ] unit-test diff --git a/extra/c/preprocessor/preprocessor.factor b/extra/c/preprocessor/preprocessor.factor new file mode 100644 index 0000000000..89292eb74b --- /dev/null +++ b/extra/c/preprocessor/preprocessor.factor @@ -0,0 +1,155 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: html.parser.state io io.encodings.utf8 io.files +io.streams.string kernel combinators accessors io.pathnames +fry sequences arrays locals namespaces io.directories +assocs math splitting make ; +IN: c.preprocessor + +: initial-library-paths ( -- seq ) + V{ "/usr/include" } clone ; + +TUPLE: preprocessor-state library-paths symbol-table +include-nesting include-nesting-max processing-disabled? +ifdef-nesting warnings ; + +: ( -- preprocessor-state ) + preprocessor-state new + initial-library-paths >>library-paths + H{ } clone >>symbol-table + 0 >>include-nesting + 200 >>include-nesting-max + 0 >>ifdef-nesting + V{ } clone >>warnings ; + +DEFER: preprocess-file + +ERROR: unknown-c-preprocessor state-parser name ; + +ERROR: bad-include-line line ; + +ERROR: header-file-missing path ; + +:: read-standard-include ( preprocessor-state path -- ) + preprocessor-state dup library-paths>> + [ path append-path exists? ] find nip + [ + dup [ + path append-path + preprocess-file + ] with-directory + ] [ + ! path header-file-missing + drop + ] if* ; + +:: read-local-include ( preprocessor-state path -- ) + current-directory get path append-path dup :> full-path + dup exists? [ + [ preprocessor-state ] dip preprocess-file + ] [ + ! full-path header-file-missing + drop + ] if ; + +: handle-include ( preprocessor-state state-parser -- ) + skip-whitespace advance dup previous { + { CHAR: < [ CHAR: > take-until-object read-standard-include ] } + { CHAR: " [ CHAR: " take-until-object read-local-include ] } + [ bad-include-line ] + } case ; + +: (readlns) ( -- ) + readln "\\" ?tail [ , ] dip [ (readlns) ] when ; + +: readlns ( -- string ) [ (readlns) ] { } make concat ; + +: handle-define ( preprocessor-state state-parser -- ) + [ take-token ] [ take-rest ] bi + "\\" ?tail [ readlns append ] when + spin symbol-table>> set-at ; + +: handle-undef ( preprocessor-state state-parser -- ) + take-token swap symbol-table>> delete-at ; + +: handle-ifdef ( preprocessor-state state-parser -- ) + [ [ 1 + ] change-ifdef-nesting ] dip + take-token over symbol-table>> key? + [ drop ] [ t >>processing-disabled? drop ] if ; + +: handle-ifndef ( preprocessor-state state-parser -- ) + [ [ 1 + ] change-ifdef-nesting ] dip + take-token over symbol-table>> key? + [ t >>processing-disabled? drop ] + [ drop ] if ; + +: handle-endif ( preprocessor-state state-parser -- ) + drop [ 1 - ] change-ifdef-nesting drop ; + +: handle-error ( preprocessor-state state-parser -- ) + skip-whitespace + nip take-rest throw ; + +: handle-warning ( preprocessor-state state-parser -- ) + skip-whitespace + take-rest swap warnings>> push ; + +: parse-directive ( preprocessor-state state-parser string -- ) + { + { "warning" [ handle-warning ] } + { "error" [ handle-error ] } + { "include" [ handle-include ] } + { "define" [ handle-define ] } + { "undef" [ handle-undef ] } + { "ifdef" [ handle-ifdef ] } + { "ifndef" [ handle-ifndef ] } + { "endif" [ handle-endif ] } + { "if" [ 2drop ] } + { "elif" [ 2drop ] } + { "else" [ 2drop ] } + { "pragma" [ 2drop ] } + { "include_next" [ 2drop ] } + [ unknown-c-preprocessor ] + } case ; + +: parse-directive-line ( preprocessor-state state-parser -- ) + advance dup take-token + pick processing-disabled?>> [ + "endif" = [ + drop f >>processing-disabled? + [ 1 - ] change-ifdef-nesting + drop + ] [ 2drop ] if + ] [ + parse-directive + ] if ; + +: preprocess-line ( preprocessor-state state-parser -- ) + skip-whitespace dup current CHAR: # = + [ parse-directive-line ] + [ swap processing-disabled?>> [ drop ] [ write-full nl ] if ] if ; + +: preprocess-lines ( preprocessor-state -- ) + readln + [ [ preprocess-line ] [ drop preprocess-lines ] 2bi ] + [ drop ] if* ; + +ERROR: include-nested-too-deeply ; + +: check-nesting ( preprocessor-state -- preprocessor-state ) + [ 1 + ] change-include-nesting + dup [ include-nesting>> ] [ include-nesting-max>> ] bi > [ + include-nested-too-deeply + ] when ; + +: preprocess-file ( preprocessor-state path -- ) + [ check-nesting ] dip + [ utf8 [ preprocess-lines ] with-file-reader ] + [ drop [ 1 - ] change-include-nesting drop ] 2bi ; + +: start-preprocess-file ( path -- preprocessor-state string ) + dup parent-directory [ + [ + [ dup ] dip preprocess-file + ] with-string-writer + ] with-directory ; diff --git a/extra/c/tests/test1/README b/extra/c/tests/test1/README new file mode 100644 index 0000000000..99873133b2 --- /dev/null +++ b/extra/c/tests/test1/README @@ -0,0 +1 @@ +Tests if the preprocessor bails on an infinite loop caused by mutually recursive #include lines. diff --git a/extra/c/tests/test1/hi.h b/extra/c/tests/test1/hi.h new file mode 100644 index 0000000000..c9f337c47a --- /dev/null +++ b/extra/c/tests/test1/hi.h @@ -0,0 +1 @@ +#include "lo.h" diff --git a/extra/c/tests/test1/lo.h b/extra/c/tests/test1/lo.h new file mode 100644 index 0000000000..d59fdd272e --- /dev/null +++ b/extra/c/tests/test1/lo.h @@ -0,0 +1 @@ +#include "hi.h" diff --git a/extra/c/tests/test1/test1.c b/extra/c/tests/test1/test1.c new file mode 100644 index 0000000000..d59fdd272e --- /dev/null +++ b/extra/c/tests/test1/test1.c @@ -0,0 +1 @@ +#include "hi.h" diff --git a/extra/c/tests/test2/README b/extra/c/tests/test2/README new file mode 100644 index 0000000000..4244828197 --- /dev/null +++ b/extra/c/tests/test2/README @@ -0,0 +1 @@ +Tests whether #define and #ifdef/#endif work in the positive case. diff --git a/extra/c/tests/test2/test2.c b/extra/c/tests/test2/test2.c new file mode 100644 index 0000000000..4cc4191db1 --- /dev/null +++ b/extra/c/tests/test2/test2.c @@ -0,0 +1,17 @@ +#define YO +#ifdef YO +yo +#endif + +#define YO2 +#ifndef YO2 +yo2 +#endif + +#ifdef YO3 +yo3 +#endif + +#ifndef YO4 +yo4 +#endif diff --git a/extra/c/tests/test3/README b/extra/c/tests/test3/README new file mode 100644 index 0000000000..4244828197 --- /dev/null +++ b/extra/c/tests/test3/README @@ -0,0 +1 @@ +Tests whether #define and #ifdef/#endif work in the positive case. diff --git a/extra/c/tests/test3/test3.c b/extra/c/tests/test3/test3.c new file mode 100644 index 0000000000..8d08e836b2 --- /dev/null +++ b/extra/c/tests/test3/test3.c @@ -0,0 +1 @@ +#error "BOO" diff --git a/extra/c/tests/test4/test4.c b/extra/c/tests/test4/test4.c new file mode 100644 index 0000000000..5acd20da67 --- /dev/null +++ b/extra/c/tests/test4/test4.c @@ -0,0 +1,2 @@ +#warning "omg" +#warning "lol" From 7c7742cafa7089c2f05837207597e0e0a9bee5b1 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 2 Apr 2009 01:18:53 -0500 Subject: [PATCH 4/4] use unicode instead of ascii again --- extra/html/parser/state/state.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index b7936f6005..5f845ce810 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular -unicode.case ascii locals combinators.short-circuit +unicode.case unicode.categories locals combinators.short-circuit make combinators io splitting ; IN: html.parser.state