From f64b8a2279c48e616e1b7dfb64adc6743fe56a92 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Wed, 14 Jan 2009 02:25:31 +0100 Subject: [PATCH 01/25] FUEL: Fix ratio and float font lock. --- misc/fuel/fuel-listener.el | 4 +--- misc/fuel/fuel-syntax.el | 14 +++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/misc/fuel/fuel-listener.el b/misc/fuel/fuel-listener.el index 66034225f1..1655356227 100644 --- a/misc/fuel/fuel-listener.el +++ b/misc/fuel/fuel-listener.el @@ -132,8 +132,7 @@ buffer." (defun fuel-listener--setup-completion () (setq fuel-syntax--current-vocab-function 'fuel-listener--current-vocab) - (setq fuel-syntax--usings-function 'fuel-listener--usings) - (set-syntax-table fuel-syntax--syntax-table)) + (setq fuel-syntax--usings-function 'fuel-listener--usings)) ;;; Stack mode support @@ -160,7 +159,6 @@ buffer." (set (make-local-variable 'comint-prompt-regexp) fuel-con--prompt-regex) (set (make-local-variable 'comint-use-prompt-regexp) t) (set (make-local-variable 'comint-prompt-read-only) t) - (set-syntax-table fuel-syntax--syntax-table) (fuel-listener--setup-completion) (fuel-listener--setup-stack-mode)) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index b74b0afc11..cea783053a 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -87,11 +87,19 @@ (defconst fuel-syntax--integer-regex "\\_<-?[0-9]+\\_>") -(defconst fuel-syntax--ratio-regex - "\\_<-?\\([0-9]+\\+\\)?[0-9]+/-?[0-9]+\\_>") +(defconst fuel-syntax--raw-float-regex + "[0-9]*\\.[0-9]*\\([eE][+-]?[0-9]+\\)?") (defconst fuel-syntax--float-regex - "\\_<-?[0-9]+\\.[0-9]*\\([eE][+-]?[0-9]+\\)?\\_>") + (format "\\_<-?%s\\_>" fuel-syntax--raw-float-regex)) + +(defconst fuel-syntax--number-regex + (format "\\([0-9]+\\|%s\\)" fuel-syntax--raw-float-regex)) + +(defconst fuel-syntax--ratio-regex + (format "\\_<[+-]?%s/-?%s\\_>" + fuel-syntax--number-regex + fuel-syntax--number-regex)) (defconst fuel-syntax--bad-string-regex "\"\\([^\"]\\|\\\\\"\\)*\n") From 0ea51ee176b47a96daed61d6dae67887197240bb Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Wed, 14 Jan 2009 21:51:01 +0100 Subject: [PATCH 02/25] FUEL: Font lock for (( ... )). --- misc/fuel/fuel-font-lock.el | 8 ++++---- misc/fuel/fuel-syntax.el | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/misc/fuel/fuel-font-lock.el b/misc/fuel/fuel-font-lock.el index 99a7c7b8fb..dc123b4874 100644 --- a/misc/fuel/fuel-font-lock.el +++ b/misc/fuel/fuel-font-lock.el @@ -74,9 +74,7 @@ (defconst fuel-font-lock--font-lock-keywords `((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect) - (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word) (,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word) - ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word) (,fuel-syntax--vocab-ref-regexp 2 'factor-font-lock-vocabulary-name) (,fuel-syntax--declaration-words-regex . 'factor-font-lock-declaration) (,fuel-syntax--word-definition-regex 2 'factor-font-lock-word) @@ -89,12 +87,14 @@ (,fuel-syntax--type-definition-regex 2 'factor-font-lock-type-name) (,fuel-syntax--method-definition-regex (1 'factor-font-lock-type-name) (2 'factor-font-lock-word)) - (,fuel-syntax--parent-type-regex 2 'factor-font-lock-type-name) + (,fuel-syntax--tuple-decl-regex 2 'factor-font-lock-type-name) (,fuel-syntax--constructor-regex . 'factor-font-lock-constructor) (,fuel-syntax--setter-regex . 'factor-font-lock-setter-word) (,fuel-syntax--getter-regex . 'factor-font-lock-getter-word) (,fuel-syntax--symbol-definition-regex 2 'factor-font-lock-symbol) - (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax))) + (,fuel-syntax--bad-string-regex . 'factor-font-lock-invalid-syntax) + ("\\(P\\|SBUF\\)\"" 1 'factor-font-lock-parsing-word) + (,fuel-syntax--parsing-words-regex . 'factor-font-lock-parsing-word))) (defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax) (set (make-local-variable 'comment-start) "! ") diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index cea783053a..151b8b7aed 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -122,8 +122,8 @@ (defconst fuel-syntax--type-definition-regex (fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:"))) -(defconst fuel-syntax--parent-type-regex - "^\\(TUPLE\\|PREDICTE\\): +[^ ]+ +< +\\([^ ]+\\)") +(defconst fuel-syntax--tuple-decl-regex + "^TUPLE: +\\([^ ]+\\) +< +\\([^ ]+\\) ") (defconst fuel-syntax--constructor-regex "<[^ >]+>") @@ -133,7 +133,8 @@ (defconst fuel-syntax--symbol-definition-regex (fuel-syntax--second-word-regex '("SYMBOL:" "VAR:"))) -(defconst fuel-syntax--stack-effect-regex " ( .* )") +(defconst fuel-syntax--stack-effect-regex + "\\( ( .* )\\)\\|\\( (( .* ))\\)") (defconst fuel-syntax--using-lines-regex "^USING: +\\([^;]+\\);") From 1bb6b7717755e23b1033a2352ea60d85ac41dd18 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Wed, 14 Jan 2009 22:53:58 +0100 Subject: [PATCH 03/25] FUEL: Fix CHAR: ! font lock. --- misc/fuel/fuel-syntax.el | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index 151b8b7aed..9cbab68d96 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -229,11 +229,11 @@ table)) (defconst fuel-syntax--syntactic-keywords - `(;; Comments: + `(;; CHARs: + ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w")) + ;; Comments: ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">")) ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">")) - ;; CHARs: - ("CHAR: \\(.\\)\\( \\|$\\)" (1 "w")) ;; Strings ("\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)*\\(\"\\)" (1 "\"") (3 "\"")) ;; Let and lambda: From ea65bd8f2c90b1320154d667536d55094cfe7a98 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Wed, 14 Jan 2009 23:28:19 +0100 Subject: [PATCH 04/25] FUEL: <" "> font lock. --- misc/fuel/fuel-syntax.el | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index 9cbab68d96..b0f54dab08 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -102,7 +102,7 @@ fuel-syntax--number-regex)) (defconst fuel-syntax--bad-string-regex - "\"\\([^\"]\\|\\\\\"\\)*\n") + "\\_<\"[^>]\\([^\"\n]\\|\\\\\"\\)*\n") (defconst fuel-syntax--word-definition-regex (fuel-syntax--second-word-regex @@ -235,7 +235,9 @@ ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">")) ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">")) ;; Strings - ("\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)*\\(\"\\)" (1 "\"") (3 "\"")) + ("\\_<<\\(\"\\)\\_>" (1 "\"")) + ("\\_<\\(\"\\)>\\_>" (1 "\"")) + ("\\_<\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)\\(\"\\)\\_>" (1 "\"") (3 "\"")) ;; Let and lambda: ("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">")) ("\\(\\[\\)\\(let\\|wlet\\|let\\*\\)\\( \\|$\\)" (1 "(]")) From 58adaac5f4c5eac11f9dc6dd13d2e3f8601b6fa5 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Thu, 15 Jan 2009 00:05:52 +0100 Subject: [PATCH 05/25] FUEL: Bug in TUPLE: font lock fixed. --- misc/fuel/fuel-syntax.el | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index b0f54dab08..18b5e752f8 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -123,7 +123,7 @@ (fuel-syntax--second-word-regex '("MIXIN:" "TUPLE:" "SINGLETON:" "UNION:"))) (defconst fuel-syntax--tuple-decl-regex - "^TUPLE: +\\([^ ]+\\) +< +\\([^ ]+\\) ") + "^TUPLE: +\\([^ \n]+\\) +< +\\([^ \n]+\\)\\_>") (defconst fuel-syntax--constructor-regex "<[^ >]+>") From d3cdd79795b5e2308ddfab19c27704f1d4287cf0 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Wed, 14 Jan 2009 18:33:15 -0600 Subject: [PATCH 06/25] Clarify with-stream docs --- basis/io/streams/duplex/duplex-docs.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/io/streams/duplex/duplex-docs.factor b/basis/io/streams/duplex/duplex-docs.factor index 48afafeec7..5bf33e9002 100644 --- a/basis/io/streams/duplex/duplex-docs.factor +++ b/basis/io/streams/duplex/duplex-docs.factor @@ -20,11 +20,11 @@ HELP: HELP: with-stream { $values { "stream" duplex-stream } { "quot" quotation } } -{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ". The stream is closed if the quotation returns or throws an error." } ; +{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream. The stream is closed if the quotation returns or throws an error." } ; HELP: with-stream* { $values { "stream" duplex-stream } { "quot" quotation } } -{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } "." } +{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream." } { $notes "This word does not close the stream. Compare with " { $link with-stream } "." } ; HELP: From a6ed18c3c25c8e2fd354a13da48eee2dc3956e84 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 15 Jan 2009 00:11:23 -0600 Subject: [PATCH 07/25] XML encoding auto-detection --- basis/xml/tests/ascii.xml | 1 + basis/xml/tests/encodings.factor | 12 ++++++ basis/xml/tests/latin1.xml | 1 + basis/xml/tests/latin5.xml | 1 + basis/xml/tests/prologless.xml | 1 + basis/xml/tests/test.factor | 16 ++++---- basis/xml/tests/utf16.xml | Bin 0 -> 94 bytes basis/xml/tests/utf16be-bom.xml | Bin 0 -> 100 bytes basis/xml/tests/utf16be.xml | Bin 0 -> 98 bytes basis/xml/tests/utf16le-bom.xml | Bin 0 -> 100 bytes basis/xml/tests/utf16le.xml | Bin 0 -> 98 bytes basis/xml/tests/utf8-bom.xml | 1 + basis/xml/tests/utf8.xml | 1 + basis/xml/tokenize/tokenize.factor | 61 +++++++++++++++++++++++++++-- basis/xml/xml.factor | 20 +++++----- 15 files changed, 94 insertions(+), 21 deletions(-) create mode 100644 basis/xml/tests/ascii.xml create mode 100644 basis/xml/tests/encodings.factor create mode 100644 basis/xml/tests/latin1.xml create mode 100644 basis/xml/tests/latin5.xml create mode 100644 basis/xml/tests/prologless.xml create mode 100644 basis/xml/tests/utf16.xml create mode 100644 basis/xml/tests/utf16be-bom.xml create mode 100644 basis/xml/tests/utf16be.xml create mode 100644 basis/xml/tests/utf16le-bom.xml create mode 100644 basis/xml/tests/utf16le.xml create mode 100644 basis/xml/tests/utf8-bom.xml create mode 100644 basis/xml/tests/utf8.xml diff --git a/basis/xml/tests/ascii.xml b/basis/xml/tests/ascii.xml new file mode 100644 index 0000000000..ca1c355c81 --- /dev/null +++ b/basis/xml/tests/ascii.xml @@ -0,0 +1 @@ +e \ No newline at end of file diff --git a/basis/xml/tests/encodings.factor b/basis/xml/tests/encodings.factor new file mode 100644 index 0000000000..92fea88b4a --- /dev/null +++ b/basis/xml/tests/encodings.factor @@ -0,0 +1,12 @@ +USING: xml xml.data xml.utilities tools.test ; + +[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test +[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test diff --git a/basis/xml/tests/latin1.xml b/basis/xml/tests/latin1.xml new file mode 100644 index 0000000000..f8bc6bcd61 --- /dev/null +++ b/basis/xml/tests/latin1.xml @@ -0,0 +1 @@ +é \ No newline at end of file diff --git a/basis/xml/tests/latin5.xml b/basis/xml/tests/latin5.xml new file mode 100644 index 0000000000..afbcf09fc7 --- /dev/null +++ b/basis/xml/tests/latin5.xml @@ -0,0 +1 @@ +ý \ No newline at end of file diff --git a/basis/xml/tests/prologless.xml b/basis/xml/tests/prologless.xml new file mode 100644 index 0000000000..a60ed31cc6 --- /dev/null +++ b/basis/xml/tests/prologless.xml @@ -0,0 +1 @@ +é \ No newline at end of file diff --git a/basis/xml/tests/test.factor b/basis/xml/tests/test.factor index f37c3fa7ac..a565df6a9d 100644 --- a/basis/xml/tests/test.factor +++ b/basis/xml/tests/test.factor @@ -53,12 +53,12 @@ SYMBOL: xml-file [ " bar " string>xml pprint-xml>string ] unit-test [ "" string>xml ] must-fail [ ] [ "" string>xml drop ] unit-test -[ T{ element-decl f "br" "EMPTY" } ] [ "" string>xml-chunk second ] unit-test -[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>xml-chunk second ] unit-test -[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>xml-chunk second ] unit-test -[ T{ element-decl f "container" "ANY" } ] [ "" string>xml-chunk second ] unit-test -[ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk second ] unit-test -[ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk second ] unit-test -[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk second ] unit-test -[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk second ] unit-test +[ T{ element-decl f "br" "EMPTY" } ] [ "" string>xml-chunk first ] unit-test +[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>xml-chunk first ] unit-test +[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>xml-chunk first ] unit-test +[ T{ element-decl f "container" "ANY" } ] [ "" string>xml-chunk first ] unit-test +[ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test +[ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test +[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test +[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test [ t ] [ "" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test diff --git a/basis/xml/tests/utf16.xml b/basis/xml/tests/utf16.xml new file mode 100644 index 0000000000000000000000000000000000000000..d8775098a1fc5408fd5e507b82f4b3ae5eb72e57 GIT binary patch literal 94 zcmW-ZJq|!X7)0Muw$u4(RBN|$0#1O4gik^uuIv(+Y&Da4FY~>g$j!!z<6`3AWMRi> g$f(FEl}^MMw2z|3oM=x+E2owb33xpP9%Yf*4|{PC%K!iX literal 0 HcmV?d00001 diff --git a/basis/xml/tests/utf16be-bom.xml b/basis/xml/tests/utf16be-bom.xml new file mode 100644 index 0000000000000000000000000000000000000000..4a6f3e255cc6e3372484eb6c2690d895b3730524 GIT binary patch literal 100 zcmW-ZO$tCz0EEAzyq%T5jTK%gPQVF}6i-olS9S>uo0)08Y3_#{i)UrQa5A!UFtgz( k2uO)Yy?OF?@l;z7>;yW2iU?XkL0s7_jGhwm9>L_lo3 iN-_$i5s3`iN72aq)1HnXry`}HmFIC-{W%_G{`SYuIv(+sAe+nW!}f)xmZ~+oQ&)o%xnZo jB61Qkr4b4A+6U3V^wW-(T24VsLnY7Su=;b{%Jgp^-H#E8 literal 0 HcmV?d00001 diff --git a/basis/xml/tests/utf16le.xml b/basis/xml/tests/utf16le.xml new file mode 100644 index 0000000000000000000000000000000000000000..5a0c7d95519a5db7f4f70350f7f29088dff9cad5 GIT binary patch literal 98 zcmW-ZI|_g>07KtVx;gn5U=bl44o449 \ No newline at end of file diff --git a/basis/xml/tests/utf8.xml b/basis/xml/tests/utf8.xml new file mode 100644 index 0000000000..83b3e2d501 --- /dev/null +++ b/basis/xml/tests/utf8.xml @@ -0,0 +1 @@ +é \ No newline at end of file diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor index 0c475c108d..26b04310d6 100644 --- a/basis/xml/tokenize/tokenize.factor +++ b/basis/xml/tokenize/tokenize.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: xml.errors xml.data xml.utilities xml.char-classes sets -xml.entities kernel state-parser kernel namespaces make strings -math math.parser sequences assocs arrays splitting combinators -unicode.case accessors fry ascii ; +USING: accessors arrays ascii assocs combinators fry io.encodings +io.encodings.iana io.encodings.utf16 io.encodings.utf8 kernel +make math.parser namespaces sequences sets splitting state-parser +xml.char-classes xml.data xml.entities xml.errors strings ; IN: xml.tokenize ! XML namespace processing: ns = namespace @@ -262,9 +262,15 @@ DEFER: direct [ yes/no>bool ] [ f ] if* ; +SYMBOL: string-input? +: decode-input-if ( encoding -- ) + string-input? get [ drop ] [ decode-input ] if ; + : parse-prolog ( -- prolog ) pass-blank middle-tag "?>" expect-string dup assure-no-extra prolog-attrs + dup encoding>> dup "UTF-16" = + [ drop ] [ name>encoding [ decode-input-if ] when* ] if dup prolog-data set ; : instruct ( -- instruction ) @@ -285,3 +291,50 @@ DEFER: direct CHAR: > expect ] } cond ; + +! Autodetecting encodings + +: start-utf16le ( -- tag ) + utf16le decode-input-if + CHAR: ? expect + 0 expect instruct ; + +: start< ( -- tag ) + get-next { + { 0 [ next next start-utf16le ] } + { CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding + [ drop utf8 decode-input-if next make-tag ] + ! That is a hack. It fails if you have > [ @@ -133,11 +133,12 @@ TUPLE: pull-xml scope ; : sax ( stream quot: ( xml-elem -- ) -- ) swap [ reset-prolog init-ns-stack - prolog-data get call-under + start-document call-under sax-loop ] state-parse ; inline recursive : (read-xml) ( -- ) + start-document process [ process ] sax-loop ; inline : (read-xml-chunk) ( stream -- prolog seq ) @@ -159,11 +160,12 @@ TUPLE: pull-xml scope ; read-xml ; : string>xml-chunk ( string -- xml ) - read-xml-chunk ; + t string-input? + [ read-xml-chunk ] with-variable ; : file>xml ( filename -- xml ) ! Autodetect encoding! - utf8 read-xml ; + binary read-xml ; : xml-reprint ( string -- ) string>xml print-xml ; From 7954a0881f60dc7d6186e3f96c5ecb57b583eac4 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Thu, 15 Jan 2009 14:00:39 +0100 Subject: [PATCH 08/25] FUEL: Correct fontification for multiline forms USING:, TUPLE:, &c. --- misc/fuel/fuel-font-lock.el | 18 +++++++++++++++--- misc/fuel/fuel-syntax.el | 7 ++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/misc/fuel/fuel-font-lock.el b/misc/fuel/fuel-font-lock.el index dc123b4874..5e438113f0 100644 --- a/misc/fuel/fuel-font-lock.el +++ b/misc/fuel/fuel-font-lock.el @@ -72,6 +72,18 @@ ;;; Font lock: +(defun fuel-font-lock--syntactic-face (state) + (cond ((nth 3 state) 'factor-font-lock-string) + ((char-equal (char-after (nth 8 state)) ?\ ) + (save-excursion + (goto-char (nth 8 state)) + (beginning-of-line) + (cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name) + ((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ") + 'factor-font-lock-symbol) + (t 'font-lock-warning-face)))) + (t 'factor-font-lock-comment))) + (defconst fuel-font-lock--font-lock-keywords `((,fuel-syntax--stack-effect-regex . 'factor-font-lock-stack-effect) (,fuel-syntax--brace-words-regex 1 'factor-font-lock-parsing-word) @@ -99,14 +111,14 @@ (defun fuel-font-lock--font-lock-setup (&optional keywords no-syntax) (set (make-local-variable 'comment-start) "! ") (set (make-local-variable 'parse-sexp-lookup-properties) t) - (set (make-local-variable 'font-lock-comment-face) 'factor-font-lock-comment) - (set (make-local-variable 'font-lock-string-face) 'factor-font-lock-string) (set (make-local-variable 'font-lock-defaults) `(,(or keywords 'fuel-font-lock--font-lock-keywords) nil nil nil nil ,@(if no-syntax nil (list (cons 'font-lock-syntactic-keywords - fuel-syntax--syntactic-keywords)))))) + fuel-syntax--syntactic-keywords) + (cons 'font-lock-syntactic-face-function + 'fuel-font-lock--syntactic-face)))))) ;;; Fontify strings as Factor code: diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index 18b5e752f8..5aaaa33964 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -235,9 +235,14 @@ ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">")) ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">")) ;; Strings + ("\\_<\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\"")) ("\\_<<\\(\"\\)\\_>" (1 "\"")) ("\\_<\\(\"\\)>\\_>" (1 "\"")) - ("\\_<\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"?\\)\\(\"\\)\\_>" (1 "\"") (3 "\"")) + ;; Multiline constructs + ("\\_\\)" (2 "" (1 ">b")) ;; Let and lambda: ("\\_<\\(!(\\) .* \\()\\)" (1 "<") (2 ">")) ("\\(\\[\\)\\(let\\|wlet\\|let\\*\\)\\( \\|$\\)" (1 "(]")) From 180f43767a6ac78c3071e868f584ff0d20010943 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Thu, 15 Jan 2009 22:08:44 +0100 Subject: [PATCH 09/25] FUEL: Use ".exe" in windows to locate factor binary. --- misc/fuel/fuel-font-lock.el | 2 +- misc/fuel/fuel-listener.el | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/misc/fuel/fuel-font-lock.el b/misc/fuel/fuel-font-lock.el index 5e438113f0..10561ded68 100644 --- a/misc/fuel/fuel-font-lock.el +++ b/misc/fuel/fuel-font-lock.el @@ -81,7 +81,7 @@ (cond ((looking-at "USING: ") 'factor-font-lock-vocabulary-name) ((looking-at "\\(TUPLE\\|SYMBOLS\\|VARS\\): ") 'factor-font-lock-symbol) - (t 'font-lock-warning-face)))) + (t 'default)))) (t 'factor-font-lock-comment))) (defconst fuel-font-lock--font-lock-keywords diff --git a/misc/fuel/fuel-listener.el b/misc/fuel/fuel-listener.el index 1655356227..f1be6a5607 100644 --- a/misc/fuel/fuel-listener.el +++ b/misc/fuel/fuel-listener.el @@ -68,7 +68,8 @@ buffer." (setq fuel-listener--buffer (current-buffer))))) (defun fuel-listener--start-process () - (let ((factor (expand-file-name fuel-listener-factor-binary)) + (let ((factor (locate-file (expand-file-name fuel-listener-factor-binary) + '("") exec-suffixes)) (image (expand-file-name fuel-listener-factor-image)) (comint-redirect-perform-sanity-check nil)) (unless (file-executable-p factor) From b9d773b3f0363fdf335dee762e64fdd8e70fdee8 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 15 Jan 2009 15:25:00 -0600 Subject: [PATCH 10/25] Fix subtle Unicode encodings autodetection bug --- basis/xml/tests/encodings.factor | 4 ++- basis/xml/tests/spaces.xml | 3 ++ basis/xml/tests/unitag.xml | 1 + basis/xml/tokenize/tokenize.factor | 57 +++++++++++++++++++++--------- 4 files changed, 47 insertions(+), 18 deletions(-) create mode 100644 basis/xml/tests/spaces.xml create mode 100644 basis/xml/tests/unitag.xml diff --git a/basis/xml/tests/encodings.factor b/basis/xml/tests/encodings.factor index 92fea88b4a..720b04ca42 100644 --- a/basis/xml/tests/encodings.factor +++ b/basis/xml/tests/encodings.factor @@ -1,7 +1,8 @@ -USING: xml xml.data xml.utilities tools.test ; +USING: xml xml.data xml.utilities tools.test accessors kernel ; [ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test +[ "\u0000e9" ] [ "resource:basis/xml/tests/spaces.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test @@ -10,3 +11,4 @@ USING: xml xml.data xml.utilities tools.test ; [ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test [ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test [ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test +[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test \ No newline at end of file diff --git a/basis/xml/tests/spaces.xml b/basis/xml/tests/spaces.xml new file mode 100644 index 0000000000..dd194ff218 --- /dev/null +++ b/basis/xml/tests/spaces.xml @@ -0,0 +1,3 @@ + + +é diff --git a/basis/xml/tests/unitag.xml b/basis/xml/tests/unitag.xml new file mode 100644 index 0000000000..b7ef6ade3c --- /dev/null +++ b/basis/xml/tests/unitag.xml @@ -0,0 +1 @@ +<é>x \ No newline at end of file diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor index 26b04310d6..f2bb5a2e97 100644 --- a/basis/xml/tokenize/tokenize.factor +++ b/basis/xml/tokenize/tokenize.factor @@ -1,9 +1,10 @@ ! Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays ascii assocs combinators fry io.encodings -io.encodings.iana io.encodings.utf16 io.encodings.utf8 kernel -make math.parser namespaces sequences sets splitting state-parser -xml.char-classes xml.data xml.entities xml.errors strings ; +USING: accessors arrays ascii assocs combinators +combinators.short-circuit fry io.encodings io.encodings.iana +io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make +math math.parser namespaces sequences sets splitting state-parser +strings xml.char-classes xml.data xml.entities xml.errors ; IN: xml.tokenize ! XML namespace processing: ns = namespace @@ -53,17 +54,23 @@ SYMBOL: ns-stack ! version=1.0? is calculated once and passed around for efficiency -: (parse-name) ( -- str ) - version=1.0? dup - get-char name-start? [ - [ dup get-char name-char? not ] take-until nip - ] [ - "Malformed name" xml-string-error - ] if ; +: assure-name ( str version=1.0? -- str ) + over { + [ first name-start? ] + [ rest-slice [ name-char? ] with all? ] + } 2&& [ "Malformed name" xml-string-error ] unless ; + +: (parse-name) ( start -- str ) + version=1.0? + [ [ get-char name-char? not ] curry take-until append ] + [ assure-name ] bi ; + +: parse-name-starting ( start -- name ) + (parse-name) get-char CHAR: : = + [ next "" (parse-name) ] [ "" swap ] if f ; : parse-name ( -- name ) - (parse-name) get-char CHAR: : = - [ next (parse-name) ] [ "" swap ] if f ; + "" parse-name-starting ; ! -- Parsing strings @@ -99,7 +106,7 @@ SYMBOL: ns-stack : parse-text ( -- string ) CHAR: < parse-char ; - + ! Parsing tags : start-tag ( -- name ? ) @@ -274,7 +281,7 @@ SYMBOL: string-input? dup prolog-data set ; : instruct ( -- instruction ) - (parse-name) dup "xml" = + "" (parse-name) dup "xml" = [ drop parse-prolog ] [ dup >lower "xml" = [ capitalized-prolog ] @@ -294,17 +301,33 @@ SYMBOL: string-input? ! Autodetecting encodings +: continue-make-tag ( str -- tag ) + parse-name-starting middle-tag end-tag CHAR: > expect ; + : start-utf16le ( -- tag ) utf16le decode-input-if CHAR: ? expect 0 expect instruct ; +: 10xxxxxx? ( ch -- ? ) + -6 shift 3 bitand 2 = ; + +: start Date: Thu, 15 Jan 2009 15:43:42 -0600 Subject: [PATCH 11/25] Placating Slava --- basis/unicode/breaks/breaks.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 0524825d43..e3e1282676 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -61,7 +61,7 @@ SYMBOL: table : eval-seq ( seq -- seq ) [ dup word? [ execute ] when ] map ; : (set-table) ( class1 class2 val -- ) - -rot table get nth [ swap or ] change-nth ; + [ table get nth ] dip '[ _ or ] change-nth ; : set-table ( classes1 classes2 val -- ) [ [ eval-seq ] bi@ ] dip From 79d4bb04e9c6fa44e9583d225f5d5775a793e576 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 15 Jan 2009 16:35:55 -0600 Subject: [PATCH 12/25] More descriptive XML errors --- basis/xml/errors/errors-tests.factor | 23 +++++++++------ basis/xml/errors/errors.factor | 42 +++++++++++++++++++--------- basis/xml/tokenize/tokenize.factor | 13 ++++----- basis/xml/xml-docs.factor | 17 ++++++++--- 4 files changed, 61 insertions(+), 34 deletions(-) diff --git a/basis/xml/errors/errors-tests.factor b/basis/xml/errors/errors-tests.factor index e72e465f0d..426ef57736 100644 --- a/basis/xml/errors/errors-tests.factor +++ b/basis/xml/errors/errors-tests.factor @@ -6,22 +6,27 @@ IN: xml.errors.tests '[ _ string>xml ] swap '[ _ = ] must-fail-with ; T{ no-entity f 1 10 "nbsp" } " " xml-error-test -T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } -} "" xml-error-test +T{ mismatched f 1 8 T{ name f "" "x" "" } T{ name f "" "y" "" } } + "" xml-error-test T{ unclosed f 1 4 V{ T{ name f "" "x" "" } } } "" xml-error-test T{ nonexist-ns f 1 5 "x" } "" xml-error-test T{ unopened f 1 5 } "" xml-error-test -T{ not-yes/no f 1 41 "maybe" } "" xml-error-test +T{ not-yes/no f 1 41 "maybe" } + "" xml-error-test T{ extra-attrs f 1 32 V{ T{ name f "" "foo" f } } } "" xml-error-test -T{ bad-version f 1 28 "5 million" } "" xml-error-test +T{ bad-version f 1 28 "5 million" } + "" xml-error-test T{ notags f } "" xml-error-test T{ multitags } "" xml-error-test -T{ bad-prolog f 1 26 T{ prolog f "1.0" "UTF-8" f } -} "" xml-error-test +T{ bad-prolog f 1 26 T{ prolog f "1.0" "UTF-8" f } } + "" xml-error-test T{ capitalized-prolog f 1 6 "XmL" } "" -xml-error-test + xml-error-test T{ pre/post-content f "x" t } "x" xml-error-test T{ versionless-prolog f 1 8 } "" xml-error-test -T{ bad-instruction f 1 11 T{ instruction f "xsl" } -} "" xml-error-test +T{ bad-instruction f 1 11 T{ instruction f "xsl" } } + "" xml-error-test +T{ unclosed-quote f 1 13 } " } "." } @@ -324,6 +321,15 @@ HELP: state-parse HELP: pre/post-content { $class-description "describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ; +HELP: unclosed-quote +{ $class-description "describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ; + +HELP: bad-name +{ $class-description "describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ; + +HELP: quoteless-attr +{ $class-description "describes the error where an attribute of an XML tag is missing quotes around a value." } ; + HELP: entities { $description "a hash table from default XML entity names (like & and <) to the characters they represent. This is automatically included when parsing any XML document." } { $see-also html-entities } ; @@ -444,6 +450,9 @@ ARTICLE: { "xml" "errors" } "XML parsing errors" { $subsection expected } { $subsection no-entity } { $subsection pre/post-content } + { $subsection unclosed-quote } + { $subsection bad-name } + { $subsection quoteless-attr } "Additionally, most of these errors delegate to " { $link parsing-error } " in order to provide more information" $nl "Note that, in parsing an XML document, only the first error is reported." ; @@ -456,7 +465,7 @@ ARTICLE: { "xml" "entities" } "XML entities" { $subsection with-html-entities } ; ARTICLE: "xml" "XML parser" -"The " { $vocab-link "xml" } " vocabulary implements the XML 1.1 standard, converting strings of text into XML and vice versa." +"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa." { $subsection { "xml" "reading" } } { $subsection { "xml" "writing" } } { $subsection { "xml" "classes" } } From 11b9dbfcd59129b8f5afe218a39b4fd251738d36 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 15 Jan 2009 16:46:57 -0600 Subject: [PATCH 13/25] Placating Slava (2) --- basis/unicode/breaks/breaks.factor | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index e3e1282676..336d99657e 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -199,8 +199,8 @@ to: word-table : walk-down ( str i -- j ) dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ; -: word-break? ( table-entry i str -- ? ) - spin { +: word-break? ( str i table-entry -- ? ) + { { t [ 2drop f ] } { f [ 2drop t ] } { check-letter-after @@ -214,10 +214,10 @@ to: word-table } case ; :: word-break-next ( old-class new-char i str -- next-class ? ) - new-char dup format/extended? - [ drop old-class dup { 1 2 3 } member? ] [ - word-break-prop old-class over word-table-nth - i str word-break? + new-char format/extended? + [ old-class dup { 1 2 3 } member? ] [ + new-char word-break-prop old-class over word-table-nth + [ str i ] dip word-break? ] if ; PRIVATE> From 756464a4871ec7e9a0faf52b0c24e1c2ed257c6b Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Fri, 16 Jan 2009 00:38:18 +0100 Subject: [PATCH 14/25] FUEL: Help browser: Fix for links to articles keyed by string lists. --- extra/fuel/fuel.factor | 2 +- misc/fuel/fuel-eval.el | 1 + misc/fuel/fuel-help.el | 3 ++- misc/fuel/fuel-markup.el | 7 ++++--- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/extra/fuel/fuel.factor b/extra/fuel/fuel.factor index 46d6ba12c7..f52a34ff28 100644 --- a/extra/fuel/fuel.factor +++ b/extra/fuel/fuel.factor @@ -141,7 +141,7 @@ PRIVATE> : fuel-get-article ( name -- ) article fuel-eval-set-result ; -MEMO: fuel-get-article-title ( name -- ) +: fuel-get-article-title ( name -- ) articles get at [ article-title ] [ f ] if* fuel-eval-set-result ; : fuel-word-help ( name -- ) (fuel-word-help) fuel-eval-set-result ; diff --git a/misc/fuel/fuel-eval.el b/misc/fuel/fuel-eval.el index 543d23bd3f..4c34ef17b8 100644 --- a/misc/fuel/fuel-eval.el +++ b/misc/fuel/fuel-eval.el @@ -31,6 +31,7 @@ ((listp sexp) (case (car sexp) (:array (factor--seq 'V{ '} (cdr sexp))) + (:seq (factor--seq '{ '} (cdr sexp))) (:quote (format "\\ %s" (factor `(:factor ,(cadr sexp))))) (:quotation (factor--seq '\[ '\] (cdr sexp))) (:using (factor `(USING: ,@(cdr sexp) :end))) diff --git a/misc/fuel/fuel-help.el b/misc/fuel/fuel-help.el index 705d1469a2..a82de388da 100644 --- a/misc/fuel/fuel-help.el +++ b/misc/fuel/fuel-help.el @@ -137,7 +137,8 @@ (defun fuel-help--get-article (name label) (message "Retrieving article ...") - (let* ((cmd `(:fuel* ((,name fuel-get-article)) "fuel" t)) + (let* ((name (if (listp name) (cons :seq name) name)) + (cmd `(:fuel* ((,name fuel-get-article)) "fuel" t)) (ret (fuel-eval--send/wait cmd)) (res (fuel-eval--retort-result ret))) (if (not res) diff --git a/misc/fuel/fuel-markup.el b/misc/fuel/fuel-markup.el index 696e4ff080..6a374cd5c8 100644 --- a/misc/fuel/fuel-markup.el +++ b/misc/fuel/fuel-markup.el @@ -61,7 +61,7 @@ (defun fuel-markup--insert-button (label link type) (let ((label (format "%s" label)) - (link (format "%s" link))) + (link (if (listp link) link (format "%s" link)))) (insert-text-button label :type 'fuel-markup--button 'markup-link link @@ -70,8 +70,9 @@ 'help-echo (format "%s (%s)" label type)))) (defun fuel-markup--article-title (name) - (fuel-eval--retort-result - (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel")))) + (let ((name (if (listp name) (cons :seq name) name))) + (fuel-eval--retort-result + (fuel-eval--send/wait `(:fuel* ((,name fuel-get-article-title)) "fuel"))))) (defun fuel-markup--link-at-point () (let ((button (condition-case nil (forward-button 0) (error nil)))) From db21e8ed318da746cd4775697f607c6498e7650b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 15 Jan 2009 17:55:30 -0600 Subject: [PATCH 15/25] add way to get length of packed bytes, add words to read packed bytes directly from streams --- basis/pack/pack-tests.factor | 2 ++ basis/pack/pack.factor | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/basis/pack/pack-tests.factor b/basis/pack/pack-tests.factor index b1a354cd4e..b813abc834 100755 --- a/basis/pack/pack-tests.factor +++ b/basis/pack/pack-tests.factor @@ -46,3 +46,5 @@ pack strings tools.test ; [ f ] [ "" [ read-c-string ] with-string-reader ] unit-test [ 5 ] [ "FRAM\0\u000005\0\0\0\0\0\0\0" [ read-c-string drop read-u64 ] with-string-reader ] unit-test +[ 9 ] +[ "iic" packed-length ] unit-test diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor index 0e5cb7dbbc..bd4b77c828 100755 --- a/basis/pack/pack.factor +++ b/basis/pack/pack.factor @@ -130,6 +130,24 @@ M: string b, ( n string -- ) heap-size b, ; { CHAR: D read-double } } ; +: packed-length-table ( -- hash ) + H{ + { CHAR: c 1 } + { CHAR: C 1 } + { CHAR: s 2 } + { CHAR: S 2 } + { CHAR: t 3 } + { CHAR: T 3 } + { CHAR: i 4 } + { CHAR: I 4 } + { CHAR: q 8 } + { CHAR: Q 8 } + { CHAR: f 4 } + { CHAR: F 4 } + { CHAR: d 8 } + { CHAR: D 8 } + } ; + MACRO: (pack) ( seq str -- quot ) [ [ @@ -172,3 +190,22 @@ MACRO: (unpack) ( str -- quot ) : unpack-le ( seq str -- seq ) [ big-endian off (unpack) ] with-scope ; + +: packed-length ( str -- n ) + [ packed-length-table at ] sigma ; + +ERROR: packed-read-fail str bytes ; + +: packed-read ( str -- bytes ) + dup packed-length [ read dup length ] keep = [ + nip + ] [ + packed-read-fail + ] if ; + +: (read-packed) ( str quot -- seq ) + [ packed-read ] swap bi ; + +: read-packed-le ( str -- seq ) [ unpack-le ] (read-packed) ; +: read-packed-be ( str -- seq ) [ unpack-be ] (read-packed) ; +: read-packed-native ( str -- seq ) [ unpack-native ] (read-packed) ; From a6e0df75ea489c7fa22dc41d7775d1c39e7aa01c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 15 Jan 2009 17:56:09 -0600 Subject: [PATCH 16/25] use sigma --- basis/roman/roman.factor | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/basis/roman/roman.factor b/basis/roman/roman.factor index 866ac92872..c9394b07ed 100644 --- a/basis/roman/roman.factor +++ b/basis/roman/roman.factor @@ -39,16 +39,14 @@ ERROR: roman-range-error n ; PRIVATE> : >roman ( n -- str ) - dup roman-range-check [ - (>roman) - ] "" make ; + dup roman-range-check + [ (>roman) ] "" make ; : >ROMAN ( n -- str ) >roman >upper ; : roman> ( str -- n ) - >lower [ roman<= ] monotonic-split [ - (roman>) - ] map sum ; + >lower [ roman<= ] monotonic-split + [ (roman>) ] sigma ; Date: Fri, 16 Jan 2009 01:14:56 +0100 Subject: [PATCH 17/25] FUEL: More font-lock fixes; README fixes. --- misc/fuel/README | 7 +------ misc/fuel/fuel-syntax.el | 40 ++++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/misc/fuel/README b/misc/fuel/README index eb280d796c..706191aaa3 100644 --- a/misc/fuel/README +++ b/misc/fuel/README @@ -18,16 +18,11 @@ beast. (load-file "/misc/fuel/fu.el") - or - - (add-to-list load-path "/fuel") - (require 'fuel) - If all you want is a major mode for editing Factor code with pretty font colors and indentation, without running the factor listener inside Emacs, you can use instead: - (add-to-list load-path "/fuel") + (add-to-list 'load-path "/fuel") (setq factor-mode-use-fuel nil) (require 'factor-mode) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index 5aaaa33964..d9f7957155 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -79,7 +79,7 @@ (regexp-opt fuel-syntax--declaration-words 'words)) (defsubst fuel-syntax--second-word-regex (prefixes) - (format "^%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t))) + (format "%s +\\([^ \r\n]+\\)" (regexp-opt prefixes t))) (defconst fuel-syntax--method-definition-regex "^M: +\\([^ ]+\\) +\\([^ ]+\\)") @@ -172,26 +172,26 @@ fuel-syntax--declaration-words-regex)) (defconst fuel-syntax--single-liner-regex - (format "^%s" (regexp-opt '("ABOUT:" - "ARTICLE:" - "ALIAS:" - "CONSTANT:" "C:" - "DEFER:" - "FORGET:" - "GENERIC:" "GENERIC#" - "HELP:" "HEX:" "HOOK:" - "IN:" "INSTANCE:" - "MAIN:" "MATH:" "MIXIN:" - "OCT:" - "POSTPONE:" "PRIVATE>" "" " Date: Thu, 15 Jan 2009 18:38:58 -0600 Subject: [PATCH 18/25] clean up pack --- basis/pack/pack-tests.factor | 7 +++-- basis/pack/pack.factor | 59 ++++++++++++++++++------------------ 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/basis/pack/pack-tests.factor b/basis/pack/pack-tests.factor index b813abc834..c32c528299 100755 --- a/basis/pack/pack-tests.factor +++ b/basis/pack/pack-tests.factor @@ -46,5 +46,8 @@ pack strings tools.test ; [ f ] [ "" [ read-c-string ] with-string-reader ] unit-test [ 5 ] [ "FRAM\0\u000005\0\0\0\0\0\0\0" [ read-c-string drop read-u64 ] with-string-reader ] unit-test -[ 9 ] -[ "iic" packed-length ] unit-test +[ 9 ] [ "iic" packed-length ] unit-test +[ "iii" read-packed-le ] must-infer +[ "iii" unpack-le ] must-infer +[ "iii" unpack-be ] must-infer +[ "iii" unpack-native ] must-infer diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor index bd4b77c828..b60b8956b6 100755 --- a/basis/pack/pack.factor +++ b/basis/pack/pack.factor @@ -9,6 +9,9 @@ SYMBOL: big-endian : big-endian? ( -- ? ) 1 *char zero? ; +: set-big-endian ( -- ) + big-endian? big-endian set ; inline + : >endian ( obj n -- str ) big-endian get [ >be ] [ >le ] if ; inline @@ -70,7 +73,7 @@ M: string b, ( n string -- ) heap-size b, ; : read-s32 ( -- n ) 4 read-signed ; : read-u32 ( -- n ) 4 read-unsigned ; : read-s64 ( -- n ) 8 read-signed ; -: read-u64 ( -- n ) 8 read-signed ; +: read-u64 ( -- n ) 8 read-unsigned ; : read-s128 ( -- n ) 16 read-signed ; : read-u128 ( -- n ) 16 read-unsigned ; @@ -81,7 +84,7 @@ M: string b, ( n string -- ) heap-size b, ; 8 read endian> bits>double ; : read-c-string ( -- str/f ) - "\0" read-until [ drop f ] unless ; + "\0" read-until swap and ; : read-c-string* ( n -- str/f ) read [ zero? ] trim-right [ f ] when-empty ; @@ -94,7 +97,7 @@ M: string b, ( n string -- ) heap-size b, ; : read-128-ber ( -- n ) 0 (read-128-ber) ; -: pack-table ( -- hash ) +CONSTANT: pack-table H{ { CHAR: c s8, } { CHAR: C u8, } @@ -110,9 +113,9 @@ M: string b, ( n string -- ) heap-size b, ; { CHAR: F float, } { CHAR: d double, } { CHAR: D double, } - } ; + } -: unpack-table ( -- hash ) +CONSTANT: unpack-table H{ { CHAR: c read-s8 } { CHAR: C read-u8 } @@ -128,9 +131,9 @@ M: string b, ( n string -- ) heap-size b, ; { CHAR: F read-float } { CHAR: d read-double } { CHAR: D read-double } - } ; + } -: packed-length-table ( -- hash ) +CONSTANT: packed-length-table H{ { CHAR: c 1 } { CHAR: C 1 } @@ -146,7 +149,7 @@ M: string b, ( n string -- ) heap-size b, ; { CHAR: F 4 } { CHAR: d 8 } { CHAR: D 8 } - } ; + } MACRO: (pack) ( seq str -- quot ) [ @@ -159,16 +162,13 @@ MACRO: (pack) ( seq str -- quot ) ] [ ] make ; : pack-native ( seq str -- seq ) - [ - big-endian? big-endian set (pack) - ] with-scope ; + [ set-big-endian (pack) ] with-scope ; inline : pack-be ( seq str -- seq ) - [ big-endian on (pack) ] with-scope ; + [ big-endian on (pack) ] with-scope ; inline : pack-le ( seq str -- seq ) - [ big-endian off (pack) ] with-scope ; - + [ big-endian off (pack) ] with-scope ; inline MACRO: (unpack) ( str -- quot ) [ @@ -181,31 +181,30 @@ MACRO: (unpack) ( str -- quot ) ] [ ] make ; : unpack-native ( seq str -- seq ) - [ - big-endian? big-endian set (unpack) - ] with-scope ; + [ set-big-endian (unpack) ] with-scope ; inline : unpack-be ( seq str -- seq ) - [ big-endian on (unpack) ] with-scope ; + [ big-endian on (unpack) ] with-scope ; inline : unpack-le ( seq str -- seq ) - [ big-endian off (unpack) ] with-scope ; + [ big-endian off (unpack) ] with-scope ; inline : packed-length ( str -- n ) [ packed-length-table at ] sigma ; ERROR: packed-read-fail str bytes ; -: packed-read ( str -- bytes ) - dup packed-length [ read dup length ] keep = [ - nip - ] [ - packed-read-fail - ] if ; + + +: read-packed ( str quot -- seq ) + [ read-packed-bytes ] swap bi ; inline + +: read-packed-le ( str -- seq ) [ unpack-le ] read-packed ; inline +: read-packed-be ( str -- seq ) [ unpack-be ] read-packed ; inline +: read-packed-native ( str -- seq ) [ unpack-native ] read-packed ; inline From 867c1cb63cdd8160eb7b4c7278a284a9d71065bc Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Fri, 16 Jan 2009 01:49:01 +0100 Subject: [PATCH 19/25] FUEL: Empty USING: form font lock. --- misc/fuel/fuel-syntax.el | 1 + 1 file changed, 1 insertion(+) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index d9f7957155..ebe4f2e854 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -239,6 +239,7 @@ ("\\_<<\\(\"\\)\\_>" (1 "\"")) ("\\_<\\(\"\\)>\\_>" (1 "\"")) ;; Multiline constructs + ("\\_b")) ("\\_\\)" (2 " Date: Thu, 15 Jan 2009 19:03:55 -0600 Subject: [PATCH 20/25] clean up pack macros --- basis/pack/pack.factor | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor index b60b8956b6..f98d90325e 100755 --- a/basis/pack/pack.factor +++ b/basis/pack/pack.factor @@ -1,7 +1,7 @@ USING: alien alien.c-types arrays assocs byte-arrays io io.binary io.streams.string kernel math math.parser namespaces make parser prettyprint quotations sequences strings vectors -words macros math.functions math.bitwise ; +words macros math.functions math.bitwise fry ; IN: pack SYMBOL: big-endian @@ -152,15 +152,9 @@ CONSTANT: packed-length-table } MACRO: (pack) ( seq str -- quot ) - [ - [ - [ - swap , pack-table at , - ] 2each - ] [ ] make 1quotation % - [ B{ } make ] % - ] [ ] make ; - + [ pack-table at 1quotation '[ _ @ ] ] [ ] 2map-as concat + '[ _ B{ } make ] ; + : pack-native ( seq str -- seq ) [ set-big-endian (pack) ] with-scope ; inline @@ -171,14 +165,8 @@ MACRO: (pack) ( seq str -- quot ) [ big-endian off (pack) ] with-scope ; inline MACRO: (unpack) ( str -- quot ) - [ - [ - [ unpack-table at , \ , , ] each - ] [ ] make - 1quotation [ { } make ] append - 1quotation % - \ with-string-reader , - ] [ ] make ; + [ unpack-table at 1quotation '[ @ , ] ] { } map-as concat + '[ [ _ { } make ] with-string-reader ] ; : unpack-native ( seq str -- seq ) [ set-big-endian (unpack) ] with-scope ; inline @@ -205,6 +193,11 @@ PRIVATE> : read-packed ( str quot -- seq ) [ read-packed-bytes ] swap bi ; inline -: read-packed-le ( str -- seq ) [ unpack-le ] read-packed ; inline -: read-packed-be ( str -- seq ) [ unpack-be ] read-packed ; inline -: read-packed-native ( str -- seq ) [ unpack-native ] read-packed ; inline +: read-packed-le ( str -- seq ) + [ unpack-le ] read-packed ; inline + +: read-packed-be ( str -- seq ) + [ unpack-be ] read-packed ; inline + +: read-packed-native ( str -- seq ) + [ unpack-native ] read-packed ; inline From 75af02313c8d2cf51347a6e268da3cf9ea4280fd Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 15 Jan 2009 19:08:08 -0600 Subject: [PATCH 21/25] add copyright information to pack --- basis/pack/pack-tests.factor | 2 ++ basis/pack/pack.factor | 2 ++ 2 files changed, 4 insertions(+) diff --git a/basis/pack/pack-tests.factor b/basis/pack/pack-tests.factor index c32c528299..1be37292a0 100755 --- a/basis/pack/pack-tests.factor +++ b/basis/pack/pack-tests.factor @@ -48,6 +48,8 @@ pack strings tools.test ; [ 9 ] [ "iic" packed-length ] unit-test [ "iii" read-packed-le ] must-infer +[ "iii" read-packed-be ] must-infer +[ "iii" read-packed-native ] must-infer [ "iii" unpack-le ] must-infer [ "iii" unpack-be ] must-infer [ "iii" unpack-native ] must-infer diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor index f98d90325e..8f00792c47 100755 --- a/basis/pack/pack.factor +++ b/basis/pack/pack.factor @@ -1,3 +1,5 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. USING: alien alien.c-types arrays assocs byte-arrays io io.binary io.streams.string kernel math math.parser namespaces make parser prettyprint quotations sequences strings vectors From 0aff3f2452af7b8de4b54b6099532e3f5ca03cf5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 15 Jan 2009 19:10:40 -0600 Subject: [PATCH 22/25] more cleanups. (pack) -> pack, use PRIVATE --- basis/pack/pack.factor | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/basis/pack/pack.factor b/basis/pack/pack.factor index 8f00792c47..136deb9ff5 100755 --- a/basis/pack/pack.factor +++ b/basis/pack/pack.factor @@ -11,9 +11,13 @@ SYMBOL: big-endian : big-endian? ( -- ? ) 1 *char zero? ; + + : >endian ( obj n -- str ) big-endian get [ >be ] [ >le ] if ; inline @@ -44,6 +48,8 @@ M: string b, ( n string -- ) heap-size b, ; : double, ( n -- ) double>bits 8 b, ; : c-string, ( str -- ) % 0 u8, ; +128-ber) ( n -- ) dup 0 > [ [ HEX: 7f bitand HEX: 80 bitor , ] keep -7 shift @@ -52,6 +58,8 @@ M: string b, ( n string -- ) heap-size b, ; drop ] if ; +PRIVATE> + : >128-ber ( n -- str ) [ [ HEX: 7f bitand , ] keep -7 shift @@ -99,6 +107,8 @@ M: string b, ( n string -- ) heap-size b, ; : read-128-ber ( -- n ) 0 (read-128-ber) ; + : pack-native ( seq str -- seq ) - [ set-big-endian (pack) ] with-scope ; inline + [ set-big-endian pack ] with-scope ; inline : pack-be ( seq str -- seq ) - [ big-endian on (pack) ] with-scope ; inline + [ big-endian on pack ] with-scope ; inline : pack-le ( seq str -- seq ) - [ big-endian off (pack) ] with-scope ; inline + [ big-endian off pack ] with-scope ; inline -MACRO: (unpack) ( str -- quot ) + + : unpack-native ( seq str -- seq ) - [ set-big-endian (unpack) ] with-scope ; inline + [ set-big-endian unpack ] with-scope ; inline : unpack-be ( seq str -- seq ) - [ big-endian on (unpack) ] with-scope ; inline + [ big-endian on unpack ] with-scope ; inline : unpack-le ( seq str -- seq ) - [ big-endian off (unpack) ] with-scope ; inline + [ big-endian off unpack ] with-scope ; inline : packed-length ( str -- n ) [ packed-length-table at ] sigma ; From 40be28cd6fa38c86303d15369752ab0618e45223 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Fri, 16 Jan 2009 02:39:59 +0100 Subject: [PATCH 23/25] FUEL: Choose adequate factor binary default, depending on system type. --- misc/fuel/fuel-listener.el | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/misc/fuel/fuel-listener.el b/misc/fuel/fuel-listener.el index f1be6a5607..aa9f05ab29 100644 --- a/misc/fuel/fuel-listener.el +++ b/misc/fuel/fuel-listener.el @@ -31,7 +31,12 @@ :group 'fuel) (defcustom fuel-listener-factor-binary - (expand-file-name "factor" fuel-factor-root-dir) + (expand-file-name (cond ((eq system-type 'windows-nt) + "factor.exe") + ((eq system-type 'darwin) + "Factor.app/Contents/MacOS/factor") + (t "factor")) + fuel-factor-root-dir) "Full path to the factor executable to use when starting a listener." :type '(file :must-match t) :group 'fuel-listener) @@ -68,8 +73,7 @@ buffer." (setq fuel-listener--buffer (current-buffer))))) (defun fuel-listener--start-process () - (let ((factor (locate-file (expand-file-name fuel-listener-factor-binary) - '("") exec-suffixes)) + (let ((factor (expand-file-name fuel-listener-factor-binary)) (image (expand-file-name fuel-listener-factor-image)) (comint-redirect-perform-sanity-check nil)) (unless (file-executable-p factor) From 571860bf9309697dd9d057c88de6a2161c9dcd62 Mon Sep 17 00:00:00 2001 From: "Jose A. Ortega Ruiz" Date: Fri, 16 Jan 2009 02:43:46 +0100 Subject: [PATCH 24/25] FUEL: Yet another font lock bug (scape in strings). --- misc/fuel/fuel-syntax.el | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/fuel/fuel-syntax.el b/misc/fuel/fuel-syntax.el index ebe4f2e854..4112e3507d 100644 --- a/misc/fuel/fuel-syntax.el +++ b/misc/fuel/fuel-syntax.el @@ -235,7 +235,7 @@ ("\\_<\\(#?!\\) .*\\(\n\\|$\\)" (1 "<") (2 ">")) ("\\_<\\(#?!\\)\\(\n\\|$\\)" (1 "<") (2 ">")) ;; Strings - ("\\_<\\(\"\\)\\([^\n\r\f\\\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\"")) + ("\\_<\\(\"\\)\\([^\n\r\f\"]\\|\\\\\"\\)*\\(\"\\)\\_>" (1 "\"") (3 "\"")) ("\\_<<\\(\"\\)\\_>" (1 "\"")) ("\\_<\\(\"\\)>\\_>" (1 "\"")) ;; Multiline constructs From 72a3d309f9306353011aef7f97c49e050fb30546 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 15 Jan 2009 22:20:24 -0600 Subject: [PATCH 25/25] XML parses entities now --- basis/syndication/syndication.factor | 2 +- basis/xml/entities/entities.factor | 261 +--------------------- basis/xml/entities/html/authors.txt | 1 + basis/xml/entities/html/html-tests.factor | 4 + basis/xml/entities/html/html.factor | 22 ++ basis/xml/entities/html/xhtml-lat1.ent | 196 ++++++++++++++++ basis/xml/entities/html/xhtml-special.ent | 80 +++++++ basis/xml/entities/html/xhtml-symbol.ent | 237 ++++++++++++++++++++ basis/xml/tests/test.factor | 5 +- basis/xml/tokenize/tokenize.factor | 88 ++++---- basis/xml/xml-docs.factor | 2 +- basis/xml/xml.factor | 4 +- 12 files changed, 596 insertions(+), 306 deletions(-) create mode 100644 basis/xml/entities/html/authors.txt create mode 100644 basis/xml/entities/html/html-tests.factor create mode 100644 basis/xml/entities/html/html.factor create mode 100644 basis/xml/entities/html/xhtml-lat1.ent create mode 100644 basis/xml/entities/html/xhtml-special.ent create mode 100644 basis/xml/entities/html/xhtml-symbol.ent diff --git a/basis/syndication/syndication.factor b/basis/syndication/syndication.factor index a6eaff4492..c82fe4006d 100644 --- a/basis/syndication/syndication.factor +++ b/basis/syndication/syndication.factor @@ -3,7 +3,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: xml.utilities kernel assocs xml.generator math.order strings sequences xml.data xml.writer - io.streams.string combinators xml xml.entities io.files io + io.streams.string combinators xml xml.entities.html io.files io http.client namespaces make xml.generator hashtables calendar.format accessors continuations urls present ; IN: syndication diff --git a/basis/xml/entities/entities.factor b/basis/xml/entities/entities.factor index 03de0f78d1..a3812c7723 100644 --- a/basis/xml/entities/entities.factor +++ b/basis/xml/entities/entities.factor @@ -1,6 +1,7 @@ ! Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. -USING: namespaces make kernel assocs sequences fry ; +USING: namespaces make kernel assocs sequences fry values +io.files io.encodings.binary ; IN: xml.entities : entities-out @@ -36,265 +37,7 @@ IN: xml.entities { "quot" CHAR: " } } ; -: html-entities - #! generated from: - #! http://www.w3.org/TR/REC-html40/sgml/entities.html - H{ - { "nbsp" 160 } - { "iexcl" 161 } - { "cent" 162 } - { "pound" 163 } - { "curren" 164 } - { "yen" 165 } - { "brvbar" 166 } - { "sect" 167 } - { "uml" 168 } - { "copy" 169 } - { "ordf" 170 } - { "laquo" 171 } - { "not" 172 } - { "shy" 173 } - { "reg" 174 } - { "macr" 175 } - { "deg" 176 } - { "plusmn" 177 } - { "sup2" 178 } - { "sup3" 179 } - { "acute" 180 } - { "micro" 181 } - { "para" 182 } - { "middot" 183 } - { "cedil" 184 } - { "sup1" 185 } - { "ordm" 186 } - { "raquo" 187 } - { "frac14" 188 } - { "frac12" 189 } - { "frac34" 190 } - { "iquest" 191 } - { "Agrave" 192 } - { "Aacute" 193 } - { "Acirc" 194 } - { "Atilde" 195 } - { "Auml" 196 } - { "Aring" 197 } - { "AElig" 198 } - { "Ccedil" 199 } - { "Egrave" 200 } - { "Eacute" 201 } - { "Ecirc" 202 } - { "Euml" 203 } - { "Igrave" 204 } - { "Iacute" 205 } - { "Icirc" 206 } - { "Iuml" 207 } - { "ETH" 208 } - { "Ntilde" 209 } - { "Ograve" 210 } - { "Oacute" 211 } - { "Ocirc" 212 } - { "Otilde" 213 } - { "Ouml" 214 } - { "times" 215 } - { "Oslash" 216 } - { "Ugrave" 217 } - { "Uacute" 218 } - { "Ucirc" 219 } - { "Uuml" 220 } - { "Yacute" 221 } - { "THORN" 222 } - { "szlig" 223 } - { "agrave" 224 } - { "aacute" 225 } - { "acirc" 226 } - { "atilde" 227 } - { "auml" 228 } - { "aring" 229 } - { "aelig" 230 } - { "ccedil" 231 } - { "egrave" 232 } - { "eacute" 233 } - { "ecirc" 234 } - { "euml" 235 } - { "igrave" 236 } - { "iacute" 237 } - { "icirc" 238 } - { "iuml" 239 } - { "eth" 240 } - { "ntilde" 241 } - { "ograve" 242 } - { "oacute" 243 } - { "ocirc" 244 } - { "otilde" 245 } - { "ouml" 246 } - { "divide" 247 } - { "oslash" 248 } - { "ugrave" 249 } - { "uacute" 250 } - { "ucirc" 251 } - { "uuml" 252 } - { "yacute" 253 } - { "thorn" 254 } - { "yuml" 255 } - { "fnof" 402 } - { "Alpha" 913 } - { "Beta" 914 } - { "Gamma" 915 } - { "Delta" 916 } - { "Epsilon" 917 } - { "Zeta" 918 } - { "Eta" 919 } - { "Theta" 920 } - { "Iota" 921 } - { "Kappa" 922 } - { "Lambda" 923 } - { "Mu" 924 } - { "Nu" 925 } - { "Xi" 926 } - { "Omicron" 927 } - { "Pi" 928 } - { "Rho" 929 } - { "Sigma" 931 } - { "Tau" 932 } - { "Upsilon" 933 } - { "Phi" 934 } - { "Chi" 935 } - { "Psi" 936 } - { "Omega" 937 } - { "alpha" 945 } - { "beta" 946 } - { "gamma" 947 } - { "delta" 948 } - { "epsilon" 949 } - { "zeta" 950 } - { "eta" 951 } - { "theta" 952 } - { "iota" 953 } - { "kappa" 954 } - { "lambda" 955 } - { "mu" 956 } - { "nu" 957 } - { "xi" 958 } - { "omicron" 959 } - { "pi" 960 } - { "rho" 961 } - { "sigmaf" 962 } - { "sigma" 963 } - { "tau" 964 } - { "upsilon" 965 } - { "phi" 966 } - { "chi" 967 } - { "psi" 968 } - { "omega" 969 } - { "thetasym" 977 } - { "upsih" 978 } - { "piv" 982 } - { "bull" 8226 } - { "hellip" 8230 } - { "prime" 8242 } - { "Prime" 8243 } - { "oline" 8254 } - { "frasl" 8260 } - { "weierp" 8472 } - { "image" 8465 } - { "real" 8476 } - { "trade" 8482 } - { "alefsym" 8501 } - { "larr" 8592 } - { "uarr" 8593 } - { "rarr" 8594 } - { "darr" 8595 } - { "harr" 8596 } - { "crarr" 8629 } - { "lArr" 8656 } - { "uArr" 8657 } - { "rArr" 8658 } - { "dArr" 8659 } - { "hArr" 8660 } - { "forall" 8704 } - { "part" 8706 } - { "exist" 8707 } - { "empty" 8709 } - { "nabla" 8711 } - { "isin" 8712 } - { "notin" 8713 } - { "ni" 8715 } - { "prod" 8719 } - { "sum" 8721 } - { "minus" 8722 } - { "lowast" 8727 } - { "radic" 8730 } - { "prop" 8733 } - { "infin" 8734 } - { "ang" 8736 } - { "and" 8743 } - { "or" 8744 } - { "cap" 8745 } - { "cup" 8746 } - { "int" 8747 } - { "there4" 8756 } - { "sim" 8764 } - { "cong" 8773 } - { "asymp" 8776 } - { "ne" 8800 } - { "equiv" 8801 } - { "le" 8804 } - { "ge" 8805 } - { "sub" 8834 } - { "sup" 8835 } - { "nsub" 8836 } - { "sube" 8838 } - { "supe" 8839 } - { "oplus" 8853 } - { "otimes" 8855 } - { "perp" 8869 } - { "sdot" 8901 } - { "lceil" 8968 } - { "rceil" 8969 } - { "lfloor" 8970 } - { "rfloor" 8971 } - { "lang" 9001 } - { "rang" 9002 } - { "loz" 9674 } - { "spades" 9824 } - { "clubs" 9827 } - { "hearts" 9829 } - { "diams" 9830 } - { "OElig" 338 } - { "oelig" 339 } - { "Scaron" 352 } - { "scaron" 353 } - { "Yuml" 376 } - { "circ" 710 } - { "tilde" 732 } - { "ensp" 8194 } - { "emsp" 8195 } - { "thinsp" 8201 } - { "zwnj" 8204 } - { "zwj" 8205 } - { "lrm" 8206 } - { "rlm" 8207 } - { "ndash" 8211 } - { "mdash" 8212 } - { "lsquo" 8216 } - { "rsquo" 8217 } - { "sbquo" 8218 } - { "ldquo" 8220 } - { "rdquo" 8221 } - { "bdquo" 8222 } - { "dagger" 8224 } - { "Dagger" 8225 } - { "permil" 8240 } - { "lsaquo" 8249 } - { "rsaquo" 8250 } - { "euro" 8364 } - } ; - SYMBOL: extra-entities -f extra-entities set-global : with-entities ( entities quot -- ) [ swap extra-entities set call ] with-scope ; inline - -: with-html-entities ( quot -- ) - html-entities swap with-entities ; inline diff --git a/basis/xml/entities/html/authors.txt b/basis/xml/entities/html/authors.txt new file mode 100644 index 0000000000..29e79639ae --- /dev/null +++ b/basis/xml/entities/html/authors.txt @@ -0,0 +1 @@ +Daniel Ehrenberg \ No newline at end of file diff --git a/basis/xml/entities/html/html-tests.factor b/basis/xml/entities/html/html-tests.factor new file mode 100644 index 0000000000..68b10bebe7 --- /dev/null +++ b/basis/xml/entities/html/html-tests.factor @@ -0,0 +1,4 @@ +! Copyright (C) 2009 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test xml.entities.html ; +IN: xml.entities.html.tests diff --git a/basis/xml/entities/html/html.factor b/basis/xml/entities/html/html.factor new file mode 100644 index 0000000000..6f2732f1d9 --- /dev/null +++ b/basis/xml/entities/html/html.factor @@ -0,0 +1,22 @@ +! Copyright (C) 2009 Daniel Ehrenberg. +! See http://factorcode.org/license.txt for BSD license. +USING: assocs io.encodings.binary io.files kernel namespaces sequences +values xml xml.entities ; +IN: xml.entities.html + +VALUE: html-entities + +: read-entities-file ( file -- table ) + f swap binary + [ 2drop extra-entities get ] sax ; + +: get-html ( -- table ) + { "lat1" "special" "symbol" } [ + "resource:basis/xml/entities/html/xhtml-" + swap ".ent" 3append read-entities-file + ] map first3 assoc-union assoc-union ; + +get-html to: html-entities + +: with-html-entities ( quot -- ) + html-entities swap with-entities ; inline diff --git a/basis/xml/entities/html/xhtml-lat1.ent b/basis/xml/entities/html/xhtml-lat1.ent new file mode 100644 index 0000000000..ffee223eb1 --- /dev/null +++ b/basis/xml/entities/html/xhtml-lat1.ent @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/basis/xml/entities/html/xhtml-special.ent b/basis/xml/entities/html/xhtml-special.ent new file mode 100644 index 0000000000..ca358b2fec --- /dev/null +++ b/basis/xml/entities/html/xhtml-special.ent @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/basis/xml/entities/html/xhtml-symbol.ent b/basis/xml/entities/html/xhtml-symbol.ent new file mode 100644 index 0000000000..63c2abfa6f --- /dev/null +++ b/basis/xml/entities/html/xhtml-symbol.ent @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/basis/xml/tests/test.factor b/basis/xml/tests/test.factor index a565df6a9d..7a826756b6 100644 --- a/basis/xml/tests/test.factor +++ b/basis/xml/tests/test.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. IN: xml.tests USING: kernel xml tools.test io namespaces make sequences -xml.errors xml.entities parser strings xml.data io.files +xml.errors xml.entities.html parser strings xml.data io.files xml.writer xml.utilities state-parser continuations assocs sequences.deep accessors io.streams.string ; @@ -62,3 +62,6 @@ SYMBOL: xml-file [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test [ t ] [ "" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test +[ "foo" ] [ "&bar;" string>xml children>string ] unit-test +[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test +[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test \ No newline at end of file diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor index 70b7150ec1..a2ae9c4d58 100644 --- a/basis/xml/tokenize/tokenize.factor +++ b/basis/xml/tokenize/tokenize.factor @@ -4,7 +4,7 @@ USING: accessors arrays ascii assocs combinators combinators.short-circuit fry io.encodings io.encodings.iana io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make math math.parser namespaces sequences sets splitting state-parser -strings xml.char-classes xml.data xml.entities xml.errors ; +strings xml.char-classes xml.data xml.entities xml.errors hashtables ; IN: xml.tokenize ! XML namespace processing: ns = namespace @@ -74,20 +74,17 @@ SYMBOL: ns-stack ! -- Parsing strings -: (parse-entity) ( string -- ) +: parse-named-entity ( string -- ) dup entities at [ , ] [ - prolog-data get standalone>> - [ no-entity ] [ - dup extra-entities get at - [ , ] [ no-entity ] ?if - ] if + dup extra-entities get at + [ dup number? [ , ] [ % ] if ] [ no-entity ] ?if ! Make less hackish ] ?if ; : parse-entity ( -- ) next CHAR: ; take-char next "#" ?head [ "x" ?head 16 10 ? base> , - ] [ (parse-entity) ] if ; + ] [ parse-named-entity ] if ; : (parse-char) ( ch -- ) get-char { @@ -100,10 +97,6 @@ SYMBOL: ns-stack : parse-char ( ch -- string ) [ (parse-char) ] "" make ; -: parse-quot ( ch -- string ) - parse-char get-char - [ unclosed-quote ] unless ; - : parse-text ( -- string ) CHAR: < parse-char ; @@ -114,14 +107,18 @@ SYMBOL: ns-stack get-char CHAR: / = dup [ next ] when parse-name swap ; -: parse-attr-value ( -- seq ) - get-char dup "'\"" member? - [ next parse-quot ] [ quoteless-attr ] if ; +: (parse-quote) ( ch -- string ) + parse-char get-char + [ unclosed-quote ] unless ; + +: parse-quote ( -- seq ) + pass-blank get-char dup "'\"" member? + [ next (parse-quote) ] [ quoteless-attr ] if ; : parse-attr ( -- ) - [ parse-name ] with-scope - pass-blank CHAR: = expect pass-blank - [ parse-attr-value ] with-scope + parse-name + pass-blank CHAR: = expect + parse-quote 2array , ; : (middle-tag) ( -- ) @@ -157,7 +154,7 @@ SYMBOL: ns-stack : only-blanks ( str -- ) [ blank? ] all? [ bad-doctype-decl ] unless ; -: take-system-literal ( -- str ) +: take-system-literal ( -- str ) ! replace with parse-quote? pass-blank get-char next { { CHAR: ' [ "'" take-string ] } { CHAR: " [ "\"" take-string ] } @@ -211,15 +208,18 @@ DEFER: direct : take-entity-def ( -- entity-name entity-def ) " " take-string pass-blank get-char { - { CHAR: ' [ take-system-literal ] } - { CHAR: " [ take-system-literal ] } + { CHAR: ' [ parse-quote ] } + { CHAR: " [ parse-quote ] } [ drop take-external-id ] } case ; +: associate-entity ( entity-name entity-def -- ) + swap extra-entities [ ?set-at ] change ; + : take-entity-decl ( -- entity-decl ) pass-blank get-char { { CHAR: % [ next pass-blank take-entity-def ] } - [ drop take-entity-def ] + [ drop take-entity-def 2dup associate-entity ] } case ">" take-string only-blanks ; @@ -257,14 +257,22 @@ DEFER: direct : good-version ( version -- version ) dup { "1.0" "1.1" } member? [ bad-version ] unless ; -: prolog-attrs ( alist -- prolog ) - [ T{ name f "" "version" f } swap at - [ good-version ] [ versionless-prolog ] if* ] keep - [ T{ name f "" "encoding" f } swap at - "UTF-8" or ] keep +: prolog-version ( alist -- version ) + T{ name f "" "version" f } swap at + [ good-version ] [ versionless-prolog ] if* ; + +: prolog-encoding ( alist -- encoding ) + T{ name f "" "encoding" f } swap at "UTF-8" or ; + +: prolog-standalone ( alist -- version ) T{ name f "" "standalone" f } swap at - [ yes/no>bool ] [ f ] if* - ; + [ yes/no>bool ] [ f ] if* ; + +: prolog-attrs ( alist -- prolog ) + [ prolog-version ] + [ prolog-encoding ] + [ prolog-standalone ] + tri ; SYMBOL: string-input? : decode-input-if ( encoding -- ) @@ -288,7 +296,7 @@ SYMBOL: string-input? : make-tag ( -- tag ) { { [ get-char dup CHAR: ! = ] [ drop next direct ] } - { [ CHAR: ? = ] [ next instruct ] } + { [ CHAR: ? = ] [ next instruct ] } [ start-tag [ dup add-ns pop-ns ] [ middle-tag end-tag ] if @@ -331,19 +339,17 @@ SYMBOL: string-input? "\u0000bb\u0000bf" expect utf8 decode-input CHAR: < expect make-tag ; +: decode-expecting ( encoding string -- tag ) + [ decode-input-if next ] [ expect-string ] bi* make-tag ; + : start-utf16be ( -- tag ) - utf16be decode-input-if - next CHAR: < expect make-tag ; + utf16be "<" decode-expecting ; : skip-utf16le-bom ( -- tag ) - utf16le decode-input-if - next HEX: FE expect - CHAR: < expect make-tag ; + utf16le "\u0000fe<" decode-expecting ; : skip-utf16be-bom ( -- tag ) - utf16be decode-input-if - next HEX: FF expect - CHAR: < expect make-tag ; + utf16be "\u0000ff<" decode-expecting ; : start-document ( -- tag ) get-char { @@ -353,8 +359,6 @@ SYMBOL: string-input? { HEX: FF [ skip-utf16le-bom ] } { HEX: FE [ skip-utf16be-bom ] } { f [ "" ] } - [ dup blank? - [ drop pass-blank utf8 decode-input-if CHAR: < expect make-tag ] - [ 1string ] if ! Replace with proper error? - ] + [ drop utf8 decode-input-if f ] + ! Same problem as with , in the case of XML chunks? } case ; diff --git a/basis/xml/xml-docs.factor b/basis/xml/xml-docs.factor index 18bd4d7328..e87c32d375 100644 --- a/basis/xml/xml-docs.factor +++ b/basis/xml/xml-docs.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: help.markup help.syntax kernel xml.data xml.errors xml.writer state-parser xml.tokenize xml.utilities xml.entities -strings sequences io ; +strings sequences io xml.entities.html ; IN: xml HELP: string>xml diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor index e29bb82eaf..328a058a58 100644 --- a/basis/xml/xml.factor +++ b/basis/xml/xml.factor @@ -133,12 +133,12 @@ TUPLE: pull-xml scope ; : sax ( stream quot: ( xml-elem -- ) -- ) swap [ reset-prolog init-ns-stack - start-document call-under + start-document [ call-under ] when* sax-loop ] state-parse ; inline recursive : (read-xml) ( -- ) - start-document process + start-document [ process ] when* [ process ] sax-loop ; inline : (read-xml-chunk) ( stream -- prolog seq )