From a14854520da6b9c41ee0f0aeb9235fa9d894129a Mon Sep 17 00:00:00 2001 From: Chris Double Date: Fri, 21 Mar 2008 03:05:21 +1300 Subject: [PATCH 01/15] Compile pegs down to words --- extra/peg/parsers/parsers.factor | 6 +- extra/peg/peg.factor | 124 +++++++++++++++++++------------ 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 3ccb1e7d10..407729004f 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -16,11 +16,11 @@ TUPLE: just-parser p1 ; ] ; -M: just-parser compile ( parser -- quot ) - just-parser-p1 compile just-pattern append ; +M: just-parser (compile) ( parser -- quot ) + just-parser-p1 compiled-parser just-pattern curry ; : just ( parser -- parser ) - just-parser construct-boa ; + just-parser construct-boa init-parser ; : 1token ( ch -- parser ) 1string token ; diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index b3200ec5eb..9d6b18398e 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -8,16 +8,42 @@ IN: peg TUPLE: parse-result remaining ast ; -GENERIC: compile ( parser -- quot ) - -: parse ( state parser -- result ) - compile call ; - SYMBOL: ignore : ( remaining ast -- parse-result ) parse-result construct-boa ; +TUPLE: parser ; +C: parser +M: parser equal? eq? ; + +: init-parser ( parser -- parser ) + #! Set the delegate for the parser + over set-delegate ; + +SYMBOL: compiled-parsers + +GENERIC: (compile) ( parser -- quot ) + +: compiled-parser ( parser -- word ) + #! Look to see if the given parser has been compied. + #! If not, compile it to a temporary word, cache it, + #! and return it. Otherwise return the existing one. + dup compiled-parsers get at [ + nip + ] [ + dup (compile) define-temp + [ swap compiled-parsers get set-at ] keep + ] if* ; + +: compile ( parser -- word ) + H{ } clone compiled-parsers [ + [ compiled-parser ] with-compilation-unit + ] with-variable ; + +: parse ( state parser -- result ) + compile call ; + ] % - seq-parser-parsers [ compile \ ?quot seq-pattern match-replace % ] each + seq-parser-parsers [ compiled-parser \ ?quot seq-pattern match-replace % ] each ] [ ] make ; TUPLE: choice-parser parsers ; @@ -110,14 +136,14 @@ TUPLE: choice-parser parsers ; dup [ ] [ - drop dup ?quot call + drop dup ?quot ] if ] ; -M: choice-parser compile ( parser -- quot ) +M: choice-parser (compile) ( parser -- quot ) [ f , - choice-parser-parsers [ compile \ ?quot choice-pattern match-replace % ] each + choice-parser-parsers [ compiled-parser \ ?quot choice-pattern match-replace % ] each \ nip , ] [ ] make ; @@ -134,20 +160,20 @@ TUPLE: repeat0-parser p1 ; : repeat0-pattern ( -- quot ) [ - ?quot swap (repeat0) + [ ?quot ] swap (repeat0) ] ; -M: repeat0-parser compile ( parser -- quot ) +M: repeat0-parser (compile) ( parser -- quot ) [ [ V{ } clone ] % - repeat0-parser-p1 compile \ ?quot repeat0-pattern match-replace % + repeat0-parser-p1 compiled-parser \ ?quot repeat0-pattern match-replace % ] [ ] make ; TUPLE: repeat1-parser p1 ; : repeat1-pattern ( -- quot ) [ - ?quot swap (repeat0) [ + [ ?quot ] swap (repeat0) [ dup parse-result-ast empty? [ drop f ] when @@ -156,49 +182,49 @@ TUPLE: repeat1-parser p1 ; ] if* ] ; -M: repeat1-parser compile ( parser -- quot ) +M: repeat1-parser (compile) ( parser -- quot ) [ [ V{ } clone ] % - repeat1-parser-p1 compile \ ?quot repeat1-pattern match-replace % + repeat1-parser-p1 compiled-parser \ ?quot repeat1-pattern match-replace % ] [ ] make ; TUPLE: optional-parser p1 ; : optional-pattern ( -- quot ) [ - dup ?quot call swap f or + dup ?quot swap f or ] ; -M: optional-parser compile ( parser -- quot ) - optional-parser-p1 compile \ ?quot optional-pattern match-replace ; +M: optional-parser (compile) ( parser -- quot ) + optional-parser-p1 compiled-parser \ ?quot optional-pattern match-replace ; TUPLE: ensure-parser p1 ; : ensure-pattern ( -- quot ) [ - dup ?quot call [ + dup ?quot [ ignore ] [ drop f ] if ] ; -M: ensure-parser compile ( parser -- quot ) - ensure-parser-p1 compile \ ?quot ensure-pattern match-replace ; +M: ensure-parser (compile) ( parser -- quot ) + ensure-parser-p1 compiled-parser \ ?quot ensure-pattern match-replace ; TUPLE: ensure-not-parser p1 ; : ensure-not-pattern ( -- quot ) [ - dup ?quot call [ + dup ?quot [ drop f ] [ ignore ] if ] ; -M: ensure-not-parser compile ( parser -- quot ) - ensure-not-parser-p1 compile \ ?quot ensure-not-pattern match-replace ; +M: ensure-not-parser (compile) ( parser -- quot ) + ensure-not-parser-p1 compiled-parser \ ?quot ensure-not-pattern match-replace ; TUPLE: action-parser p1 quot ; @@ -206,14 +232,14 @@ MATCH-VARS: ?action ; : action-pattern ( -- quot ) [ - ?quot call dup [ + ?quot dup [ dup parse-result-ast ?action call swap [ set-parse-result-ast ] keep ] when ] ; -M: action-parser compile ( parser -- quot ) - { action-parser-p1 action-parser-quot } get-slots [ compile ] dip +M: action-parser (compile) ( parser -- quot ) + { action-parser-p1 action-parser-quot } get-slots [ compiled-parser ] dip 2array { ?quot ?action } action-pattern match-replace ; : left-trim-slice ( string -- string ) @@ -225,31 +251,31 @@ M: action-parser compile ( parser -- quot ) TUPLE: sp-parser p1 ; -M: sp-parser compile ( parser -- quot ) +M: sp-parser (compile) ( parser -- quot ) [ - \ left-trim-slice , sp-parser-p1 compile % + \ left-trim-slice , sp-parser-p1 compiled-parser , ] [ ] make ; TUPLE: delay-parser quot ; -M: delay-parser compile ( parser -- quot ) +M: delay-parser (compile) ( parser -- quot ) [ - delay-parser-quot % \ compile , \ call , + delay-parser-quot % \ (compile) , \ call , ] [ ] make ; PRIVATE> : token ( string -- parser ) - token-parser construct-boa ; + token-parser construct-boa init-parser ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa ; + satisfy-parser construct-boa init-parser ; : range ( min max -- parser ) - range-parser construct-boa ; + range-parser construct-boa init-parser ; : seq ( seq -- parser ) - seq-parser construct-boa ; + seq-parser construct-boa init-parser ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -264,7 +290,7 @@ PRIVATE> { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa ; + choice-parser construct-boa init-parser ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -279,31 +305,31 @@ PRIVATE> { } make choice ; inline : repeat0 ( parser -- parser ) - repeat0-parser construct-boa ; + repeat0-parser construct-boa init-parser ; : repeat1 ( parser -- parser ) - repeat1-parser construct-boa ; + repeat1-parser construct-boa init-parser ; : optional ( parser -- parser ) - optional-parser construct-boa ; + optional-parser construct-boa init-parser ; : ensure ( parser -- parser ) - ensure-parser construct-boa ; + ensure-parser construct-boa init-parser ; : ensure-not ( parser -- parser ) - ensure-not-parser construct-boa ; + ensure-not-parser construct-boa init-parser ; : action ( parser quot -- parser ) - action-parser construct-boa ; + action-parser construct-boa init-parser ; : sp ( parser -- parser ) - sp-parser construct-boa ; + sp-parser construct-boa init-parser ; : hide ( parser -- parser ) [ drop ignore ] action ; : delay ( quot -- parser ) - delay-parser construct-boa ; + delay-parser construct-boa init-parser ; : PEG: (:) [ From d1e0aa6e806e730d1972274e262a2f5b8ddd3563 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Sat, 22 Mar 2008 00:58:53 +1300 Subject: [PATCH 02/15] Get peg subvocabs working again --- extra/peg/ebnf/ebnf-tests.factor | 2 +- extra/peg/ebnf/ebnf.factor | 2 +- extra/peg/peg.factor | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/extra/peg/ebnf/ebnf-tests.factor b/extra/peg/ebnf/ebnf-tests.factor index 54639431a4..c9b9f5d977 100644 --- a/extra/peg/ebnf/ebnf-tests.factor +++ b/extra/peg/ebnf/ebnf-tests.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2007 Chris Double. ! See http://factorcode.org/license.txt for BSD license. ! -USING: kernel tools.test peg peg.ebnf ; +USING: kernel tools.test peg peg.ebnf words ; IN: peg.ebnf.tests { T{ ebnf-non-terminal f "abc" } } [ diff --git a/extra/peg/ebnf/ebnf.factor b/extra/peg/ebnf/ebnf.factor index ab7baa547e..db478e571f 100644 --- a/extra/peg/ebnf/ebnf.factor +++ b/extra/peg/ebnf/ebnf.factor @@ -278,7 +278,7 @@ M: ebnf-non-terminal (transform) ( ast -- parser ) : ebnf>quot ( string -- hashtable quot ) 'ebnf' parse check-parse-result - parse-result-ast transform dup main swap at compile ; + parse-result-ast transform dup main swap at compile 1quotation ; : [EBNF "EBNF]" parse-multiline-string ebnf>quot nip parsed ; parsing diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 9d6b18398e..47dc0a3454 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words ; + words quotations ; IN: peg TUPLE: parse-result remaining ast ; @@ -42,7 +42,7 @@ GENERIC: (compile) ( parser -- quot ) ] with-variable ; : parse ( state parser -- result ) - compile call ; + compile execute ; @@ -334,7 +334,7 @@ PRIVATE> : PEG: (:) [ [ - call compile + call compile 1quotation [ dup [ parse-result-ast ] [ "Parse failed" throw ] if ] append define ] with-compilation-unit From 943b02ab2f1893012ff68af1bef4214f03c4d349 Mon Sep 17 00:00:00 2001 From: Chris Double Date: Sat, 22 Mar 2008 01:59:16 +1300 Subject: [PATCH 03/15] Fix performance regression in pegs delay parser is improved to use a memoized quotation so the construction and compilation of the parser at runtime only occurs once. Changed compile so it would use equality rather than identity for memoization purposes. --- extra/peg/parsers/parsers.factor | 2 +- extra/peg/peg.factor | 50 +++++++++++++++----------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/extra/peg/parsers/parsers.factor b/extra/peg/parsers/parsers.factor index 407729004f..4bba60bb09 100755 --- a/extra/peg/parsers/parsers.factor +++ b/extra/peg/parsers/parsers.factor @@ -20,7 +20,7 @@ M: just-parser (compile) ( parser -- quot ) just-parser-p1 compiled-parser just-pattern curry ; : just ( parser -- parser ) - just-parser construct-boa init-parser ; + just-parser construct-boa ; : 1token ( ch -- parser ) 1string token ; diff --git a/extra/peg/peg.factor b/extra/peg/peg.factor index 47dc0a3454..1707193e70 100755 --- a/extra/peg/peg.factor +++ b/extra/peg/peg.factor @@ -3,7 +3,7 @@ USING: kernel sequences strings namespaces math assocs shuffle vectors arrays combinators.lib math.parser match unicode.categories sequences.lib compiler.units parser - words quotations ; + words quotations effects memoize ; IN: peg TUPLE: parse-result remaining ast ; @@ -13,20 +13,12 @@ SYMBOL: ignore : ( remaining ast -- parse-result ) parse-result construct-boa ; -TUPLE: parser ; -C: parser -M: parser equal? eq? ; - -: init-parser ( parser -- parser ) - #! Set the delegate for the parser - over set-delegate ; - SYMBOL: compiled-parsers GENERIC: (compile) ( parser -- quot ) : compiled-parser ( parser -- word ) - #! Look to see if the given parser has been compied. + #! Look to see if the given parser has been compiled. #! If not, compile it to a temporary word, cache it, #! and return it. Otherwise return the existing one. dup compiled-parsers get at [ @@ -36,7 +28,7 @@ GENERIC: (compile) ( parser -- quot ) [ swap compiled-parsers get set-at ] keep ] if* ; -: compile ( parser -- word ) +MEMO: compile ( parser -- word ) H{ } clone compiled-parsers [ [ compiled-parser ] with-compilation-unit ] with-variable ; @@ -47,6 +39,7 @@ GENERIC: (compile) ( parser -- quot ) memoize-quot + [ % \ execute , ] [ ] make ; PRIVATE> : token ( string -- parser ) - token-parser construct-boa init-parser ; + token-parser construct-boa ; : satisfy ( quot -- parser ) - satisfy-parser construct-boa init-parser ; + satisfy-parser construct-boa ; : range ( min max -- parser ) - range-parser construct-boa init-parser ; + range-parser construct-boa ; : seq ( seq -- parser ) - seq-parser construct-boa init-parser ; + seq-parser construct-boa ; : 2seq ( parser1 parser2 -- parser ) 2array seq ; @@ -290,7 +288,7 @@ PRIVATE> { } make seq ; inline : choice ( seq -- parser ) - choice-parser construct-boa init-parser ; + choice-parser construct-boa ; : 2choice ( parser1 parser2 -- parser ) 2array choice ; @@ -305,31 +303,31 @@ PRIVATE> { } make choice ; inline : repeat0 ( parser -- parser ) - repeat0-parser construct-boa init-parser ; + repeat0-parser construct-boa ; : repeat1 ( parser -- parser ) - repeat1-parser construct-boa init-parser ; + repeat1-parser construct-boa ; : optional ( parser -- parser ) - optional-parser construct-boa init-parser ; + optional-parser construct-boa ; : ensure ( parser -- parser ) - ensure-parser construct-boa init-parser ; + ensure-parser construct-boa ; : ensure-not ( parser -- parser ) - ensure-not-parser construct-boa init-parser ; + ensure-not-parser construct-boa ; : action ( parser quot -- parser ) - action-parser construct-boa init-parser ; + action-parser construct-boa ; : sp ( parser -- parser ) - sp-parser construct-boa init-parser ; + sp-parser construct-boa ; : hide ( parser -- parser ) [ drop ignore ] action ; : delay ( quot -- parser ) - delay-parser construct-boa init-parser ; + delay-parser construct-boa ; : PEG: (:) [ From 3586b5a35d8b043fb46389064ccd691766c9cb30 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 21 Mar 2008 12:30:13 -0400 Subject: [PATCH 04/15] More 8-bit encodings --- extra/io/encodings/8-bit/8-bit-tests.factor | 9 + extra/io/encodings/8-bit/8-bit.factor | 89 +++++ extra/io/encodings/8-bit/8859-1.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-10.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-11.TXT | 297 ++++++++++++++ extra/io/encodings/8-bit/8859-13.TXT | 299 ++++++++++++++ extra/io/encodings/8-bit/8859-14.TXT | 301 ++++++++++++++ extra/io/encodings/8-bit/8859-15.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-16.TXT | 299 ++++++++++++++ extra/io/encodings/8-bit/8859-2.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-3.TXT | 296 ++++++++++++++ extra/io/encodings/8-bit/8859-4.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-5.TXT | 303 ++++++++++++++ extra/io/encodings/8-bit/8859-6.TXT | 260 ++++++++++++ extra/io/encodings/8-bit/8859-7.TXT | 308 +++++++++++++++ extra/io/encodings/8-bit/8859-8.TXT | 270 +++++++++++++ extra/io/encodings/8-bit/8859-9.TXT | 307 +++++++++++++++ extra/io/encodings/8-bit/CP037.TXT | 275 +++++++++++++ extra/io/encodings/8-bit/CP1252.TXT | 274 +++++++++++++ extra/io/encodings/8-bit/GSM0338.TXT | 239 +++++++++++ extra/io/encodings/8-bit/KOI8-R.TXT | 302 ++++++++++++++ extra/io/encodings/8-bit/ROMAN.TXT | 370 ++++++++++++++++++ .../encodings/{latin1 => 8-bit}/authors.txt | 0 extra/io/encodings/8-bit/summary.txt | 1 + extra/io/encodings/{latin1 => 8-bit}/tags.txt | 0 extra/io/encodings/latin1/latin1-docs.factor | 5 - extra/io/encodings/latin1/latin1-tests.factor | 9 - extra/io/encodings/latin1/latin1.factor | 12 - extra/io/encodings/latin1/summary.txt | 1 - 29 files changed, 6014 insertions(+), 27 deletions(-) create mode 100644 extra/io/encodings/8-bit/8-bit-tests.factor create mode 100644 extra/io/encodings/8-bit/8-bit.factor create mode 100644 extra/io/encodings/8-bit/8859-1.TXT create mode 100644 extra/io/encodings/8-bit/8859-10.TXT create mode 100644 extra/io/encodings/8-bit/8859-11.TXT create mode 100644 extra/io/encodings/8-bit/8859-13.TXT create mode 100644 extra/io/encodings/8-bit/8859-14.TXT create mode 100644 extra/io/encodings/8-bit/8859-15.TXT create mode 100644 extra/io/encodings/8-bit/8859-16.TXT create mode 100644 extra/io/encodings/8-bit/8859-2.TXT create mode 100644 extra/io/encodings/8-bit/8859-3.TXT create mode 100644 extra/io/encodings/8-bit/8859-4.TXT create mode 100644 extra/io/encodings/8-bit/8859-5.TXT create mode 100644 extra/io/encodings/8-bit/8859-6.TXT create mode 100644 extra/io/encodings/8-bit/8859-7.TXT create mode 100644 extra/io/encodings/8-bit/8859-8.TXT create mode 100644 extra/io/encodings/8-bit/8859-9.TXT create mode 100644 extra/io/encodings/8-bit/CP037.TXT create mode 100644 extra/io/encodings/8-bit/CP1252.TXT create mode 100644 extra/io/encodings/8-bit/GSM0338.TXT create mode 100644 extra/io/encodings/8-bit/KOI8-R.TXT create mode 100644 extra/io/encodings/8-bit/ROMAN.TXT rename extra/io/encodings/{latin1 => 8-bit}/authors.txt (100%) create mode 100644 extra/io/encodings/8-bit/summary.txt rename extra/io/encodings/{latin1 => 8-bit}/tags.txt (100%) delete mode 100644 extra/io/encodings/latin1/latin1-docs.factor delete mode 100644 extra/io/encodings/latin1/latin1-tests.factor delete mode 100755 extra/io/encodings/latin1/latin1.factor delete mode 100644 extra/io/encodings/latin1/summary.txt diff --git a/extra/io/encodings/8-bit/8-bit-tests.factor b/extra/io/encodings/8-bit/8-bit-tests.factor new file mode 100644 index 0000000000..316e496219 --- /dev/null +++ b/extra/io/encodings/8-bit/8-bit-tests.factor @@ -0,0 +1,9 @@ +USING: io.encodings.string io.encodings.8-bit tools.test strings arrays ; +IN: io.encodings.8-bit.tests + +[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" iso-8859-1 encode ] unit-test +[ { 256 } >string iso-8859-1 encode ] must-fail +[ B{ 255 } ] [ { 255 } iso-8859-1 encode ] unit-test + +[ "bar" ] [ "bar" iso-8859-1 decode ] unit-test +[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } iso-8859-1 decode >array ] unit-test diff --git a/extra/io/encodings/8-bit/8-bit.factor b/extra/io/encodings/8-bit/8-bit.factor new file mode 100644 index 0000000000..ff0e6ec8bf --- /dev/null +++ b/extra/io/encodings/8-bit/8-bit.factor @@ -0,0 +1,89 @@ +! Copyright (C) 2008 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. +USING: math.parser arrays io.encodings sequences kernel +assocs hashtables io.encodings.ascii combinators.cleave +generic parser tuples words io io.files splitting namespaces +classes quotations ; +IN: io.encodings.8-bit + + ] map ] map ; + +: byte>ch ( assoc -- array ) + 256 replacement-char + [ [ swapd set-nth ] curry assoc-each ] keep ; + +: ch>byte ( assoc -- newassoc ) + [ swap ] assoc-map >hashtable ; + +: parse-file ( file-name -- byte>ch ch>byte ) + full-path ascii file-lines process-contents + [ byte>ch ] [ ch>byte ] bi ; + +: empty-tuple-class ( string -- class ) + in get create + dup { f } "slots" set-word-prop + dup predicate-word drop + dup { } define-tuple-class ; + +: data-quot ( class word data -- quot ) + >r [ word-name ] 2apply "/" swap 3append + "/data" append in get create dup 1quotation swap r> + 1quotation define ; + +: method-with-data ( class data word quot -- ) + >r swap >r 2dup r> data-quot r> + compose >r create-method r> define ; + +: encode-8-bit ( char stream encoding assoc -- ) + nip swapd at* [ encode-error ] unless swap stream-write1 ; + +: define-encode-char ( class assoc -- ) + \ encode-char [ encode-8-bit ] method-with-data ; + +: decode-8-bit ( stream encoding array -- char/f ) + nip swap stream-read1 [ swap nth ] [ drop f ] if* ; + +: define-decode-char ( class array -- ) + \ decode-char [ decode-8-bit ] method-with-data ; + +: 8-bit-methods ( class byte>ch ch>byte -- ) + >r over r> define-encode-char define-decode-char ; + +: define-8-bit-encoding ( tuple-name file-name -- ) + >r empty-tuple-class r> parse-file 8-bit-methods ; + +PRIVATE> + +! << mappings [ define-8-bit-encoding ] assoc-each >> diff --git a/extra/io/encodings/8-bit/8859-1.TXT b/extra/io/encodings/8-bit/8859-1.TXT new file mode 100644 index 0000000000..473ecabc17 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-1.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-1:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-1:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-1 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/extra/io/encodings/8-bit/8859-10.TXT b/extra/io/encodings/8-bit/8859-10.TXT new file mode 100644 index 0000000000..374a42b1a5 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-10.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-10:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.1 +# Table format: Format A +# Date: 1999 October 11 +# Authors: Ken Whistler +# +# Copyright (c) 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-10:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-10 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-10 order. +# +# Version history +# 1.0 version new. +# 1.1 corrected mistake in mapping of 0xA4 +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xA3 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xA4 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xA5 0x0128 # LATIN CAPITAL LETTER I WITH TILDE +0xA6 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xA7 0x00A7 # SECTION SIGN +0xA8 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xA9 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xAA 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAB 0x0166 # LATIN CAPITAL LETTER T WITH STROKE +0xAC 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xAF 0x014A # LATIN CAPITAL LETTER ENG +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xB3 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xB4 0x012B # LATIN SMALL LETTER I WITH MACRON +0xB5 0x0129 # LATIN SMALL LETTER I WITH TILDE +0xB6 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xB9 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xBA 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBB 0x0167 # LATIN SMALL LETTER T WITH STROKE +0xBC 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBD 0x2015 # HORIZONTAL BAR +0xBE 0x016B # LATIN SMALL LETTER U WITH MACRON +0xBF 0x014B # LATIN SMALL LETTER ENG +0xC0 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic) +0xD1 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD2 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x0168 # LATIN CAPITAL LETTER U WITH TILDE +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic) +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic) +0xF1 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF2 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x0169 # LATIN SMALL LETTER U WITH TILDE +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic) +0xFF 0x0138 # LATIN SMALL LETTER KRA diff --git a/extra/io/encodings/8-bit/8859-11.TXT b/extra/io/encodings/8-bit/8859-11.TXT new file mode 100644 index 0000000000..192bd9d7cf --- /dev/null +++ b/extra/io/encodings/8-bit/8859-11.TXT @@ -0,0 +1,297 @@ +# +# Name: ISO/IEC 8859-11:2001 to Unicode +# Unicode version: 3.2 +# Table version: 1.0 +# Table format: Format A +# Date: 2002 October 7 +# Authors: Ken Whistler +# +# Copyright (c) 2002 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-11:2001 characters map into Unicode. +# +# ISO/IEC 8859-11:2001 is equivalent to TIS 620-2533 (1990) with +# the addition of 0xA0 NO-BREAK SPACE. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-11 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-11 order. +# +# Version history: +# 2002 October 7 Created +# +# Updated versions of this file may be found in: +# +# +# For any comments or problems, please use the Unicode +# web contact form at: +# http://www.unicode.org/unicode/reporting.html +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0E01 # THAI CHARACTER KO KAI +0xA2 0x0E02 # THAI CHARACTER KHO KHAI +0xA3 0x0E03 # THAI CHARACTER KHO KHUAT +0xA4 0x0E04 # THAI CHARACTER KHO KHWAI +0xA5 0x0E05 # THAI CHARACTER KHO KHON +0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG +0xA7 0x0E07 # THAI CHARACTER NGO NGU +0xA8 0x0E08 # THAI CHARACTER CHO CHAN +0xA9 0x0E09 # THAI CHARACTER CHO CHING +0xAA 0x0E0A # THAI CHARACTER CHO CHANG +0xAB 0x0E0B # THAI CHARACTER SO SO +0xAC 0x0E0C # THAI CHARACTER CHO CHOE +0xAD 0x0E0D # THAI CHARACTER YO YING +0xAE 0x0E0E # THAI CHARACTER DO CHADA +0xAF 0x0E0F # THAI CHARACTER TO PATAK +0xB0 0x0E10 # THAI CHARACTER THO THAN +0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO +0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO +0xB3 0x0E13 # THAI CHARACTER NO NEN +0xB4 0x0E14 # THAI CHARACTER DO DEK +0xB5 0x0E15 # THAI CHARACTER TO TAO +0xB6 0x0E16 # THAI CHARACTER THO THUNG +0xB7 0x0E17 # THAI CHARACTER THO THAHAN +0xB8 0x0E18 # THAI CHARACTER THO THONG +0xB9 0x0E19 # THAI CHARACTER NO NU +0xBA 0x0E1A # THAI CHARACTER BO BAIMAI +0xBB 0x0E1B # THAI CHARACTER PO PLA +0xBC 0x0E1C # THAI CHARACTER PHO PHUNG +0xBD 0x0E1D # THAI CHARACTER FO FA +0xBE 0x0E1E # THAI CHARACTER PHO PHAN +0xBF 0x0E1F # THAI CHARACTER FO FAN +0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO +0xC1 0x0E21 # THAI CHARACTER MO MA +0xC2 0x0E22 # THAI CHARACTER YO YAK +0xC3 0x0E23 # THAI CHARACTER RO RUA +0xC4 0x0E24 # THAI CHARACTER RU +0xC5 0x0E25 # THAI CHARACTER LO LING +0xC6 0x0E26 # THAI CHARACTER LU +0xC7 0x0E27 # THAI CHARACTER WO WAEN +0xC8 0x0E28 # THAI CHARACTER SO SALA +0xC9 0x0E29 # THAI CHARACTER SO RUSI +0xCA 0x0E2A # THAI CHARACTER SO SUA +0xCB 0x0E2B # THAI CHARACTER HO HIP +0xCC 0x0E2C # THAI CHARACTER LO CHULA +0xCD 0x0E2D # THAI CHARACTER O ANG +0xCE 0x0E2E # THAI CHARACTER HO NOKHUK +0xCF 0x0E2F # THAI CHARACTER PAIYANNOI +0xD0 0x0E30 # THAI CHARACTER SARA A +0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT +0xD2 0x0E32 # THAI CHARACTER SARA AA +0xD3 0x0E33 # THAI CHARACTER SARA AM +0xD4 0x0E34 # THAI CHARACTER SARA I +0xD5 0x0E35 # THAI CHARACTER SARA II +0xD6 0x0E36 # THAI CHARACTER SARA UE +0xD7 0x0E37 # THAI CHARACTER SARA UEE +0xD8 0x0E38 # THAI CHARACTER SARA U +0xD9 0x0E39 # THAI CHARACTER SARA UU +0xDA 0x0E3A # THAI CHARACTER PHINTHU +0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT +0xE0 0x0E40 # THAI CHARACTER SARA E +0xE1 0x0E41 # THAI CHARACTER SARA AE +0xE2 0x0E42 # THAI CHARACTER SARA O +0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN +0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI +0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO +0xE6 0x0E46 # THAI CHARACTER MAIYAMOK +0xE7 0x0E47 # THAI CHARACTER MAITAIKHU +0xE8 0x0E48 # THAI CHARACTER MAI EK +0xE9 0x0E49 # THAI CHARACTER MAI THO +0xEA 0x0E4A # THAI CHARACTER MAI TRI +0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA +0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT +0xED 0x0E4D # THAI CHARACTER NIKHAHIT +0xEE 0x0E4E # THAI CHARACTER YAMAKKAN +0xEF 0x0E4F # THAI CHARACTER FONGMAN +0xF0 0x0E50 # THAI DIGIT ZERO +0xF1 0x0E51 # THAI DIGIT ONE +0xF2 0x0E52 # THAI DIGIT TWO +0xF3 0x0E53 # THAI DIGIT THREE +0xF4 0x0E54 # THAI DIGIT FOUR +0xF5 0x0E55 # THAI DIGIT FIVE +0xF6 0x0E56 # THAI DIGIT SIX +0xF7 0x0E57 # THAI DIGIT SEVEN +0xF8 0x0E58 # THAI DIGIT EIGHT +0xF9 0x0E59 # THAI DIGIT NINE +0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU +0xFB 0x0E5B # THAI CHARACTER KHOMUT diff --git a/extra/io/encodings/8-bit/8859-13.TXT b/extra/io/encodings/8-bit/8859-13.TXT new file mode 100644 index 0000000000..cd11b53fd7 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-13.TXT @@ -0,0 +1,299 @@ +# +# Name: ISO/IEC 8859-13:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-13:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-13 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-13 order. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x201D # RIGHT DOUBLE QUOTATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00C6 # LATIN CAPITAL LETTER AE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x201C # LEFT DOUBLE QUOTATION MARK +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x0157 # LATIN SMALL LETTER R WITH CEDILLA +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00E6 # LATIN SMALL LETTER AE +0xC0 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xC1 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC2 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC3 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xC7 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xCB 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCC 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xCD 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xCE 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xCF 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xD0 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xD9 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xDA 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xDB 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xDE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German) +0xE0 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xE1 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE2 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE3 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xE7 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xEB 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xEC 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xED 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xEE 0x012B # LATIN SMALL LETTER I WITH MACRON +0xEF 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xF0 0x0161 # LATIN SMALL LETTER S WITH CARON +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xF9 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xFA 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xFB 0x016B # LATIN SMALL LETTER U WITH MACRON +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xFE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xFF 0x2019 # RIGHT SINGLE QUOTATION MARK diff --git a/extra/io/encodings/8-bit/8859-14.TXT b/extra/io/encodings/8-bit/8859-14.TXT new file mode 100644 index 0000000000..73e98555ea --- /dev/null +++ b/extra/io/encodings/8-bit/8859-14.TXT @@ -0,0 +1,301 @@ +# +# Name: ISO/IEC 8859-14:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn +# Ken Whistler +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-14:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-14 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-14 order. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x1E02 # LATIN CAPITAL LETTER B WITH DOT ABOVE +0xA2 0x1E03 # LATIN SMALL LETTER B WITH DOT ABOVE +0xA3 0x00A3 # POUND SIGN +0xA4 0x010A # LATIN CAPITAL LETTER C WITH DOT ABOVE +0xA5 0x010B # LATIN SMALL LETTER C WITH DOT ABOVE +0xA6 0x1E0A # LATIN CAPITAL LETTER D WITH DOT ABOVE +0xA7 0x00A7 # SECTION SIGN +0xA8 0x1E80 # LATIN CAPITAL LETTER W WITH GRAVE +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x1E82 # LATIN CAPITAL LETTER W WITH ACUTE +0xAB 0x1E0B # LATIN SMALL LETTER D WITH DOT ABOVE +0xAC 0x1EF2 # LATIN CAPITAL LETTER Y WITH GRAVE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xB0 0x1E1E # LATIN CAPITAL LETTER F WITH DOT ABOVE +0xB1 0x1E1F # LATIN SMALL LETTER F WITH DOT ABOVE +0xB2 0x0120 # LATIN CAPITAL LETTER G WITH DOT ABOVE +0xB3 0x0121 # LATIN SMALL LETTER G WITH DOT ABOVE +0xB4 0x1E40 # LATIN CAPITAL LETTER M WITH DOT ABOVE +0xB5 0x1E41 # LATIN SMALL LETTER M WITH DOT ABOVE +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x1E56 # LATIN CAPITAL LETTER P WITH DOT ABOVE +0xB8 0x1E81 # LATIN SMALL LETTER W WITH GRAVE +0xB9 0x1E57 # LATIN SMALL LETTER P WITH DOT ABOVE +0xBA 0x1E83 # LATIN SMALL LETTER W WITH ACUTE +0xBB 0x1E60 # LATIN CAPITAL LETTER S WITH DOT ABOVE +0xBC 0x1EF3 # LATIN SMALL LETTER Y WITH GRAVE +0xBD 0x1E84 # LATIN CAPITAL LETTER W WITH DIAERESIS +0xBE 0x1E85 # LATIN SMALL LETTER W WITH DIAERESIS +0xBF 0x1E61 # LATIN SMALL LETTER S WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x0174 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x1E6A # LATIN CAPITAL LETTER T WITH DOT ABOVE +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x0176 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x0175 # LATIN SMALL LETTER W WITH CIRCUMFLEX +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x1E6B # LATIN SMALL LETTER T WITH DOT ABOVE +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x0177 # LATIN SMALL LETTER Y WITH CIRCUMFLEX +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + diff --git a/extra/io/encodings/8-bit/8859-15.TXT b/extra/io/encodings/8-bit/8859-15.TXT new file mode 100644 index 0000000000..ab2f32fcea --- /dev/null +++ b/extra/io/encodings/8-bit/8859-15.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-15:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Markus Kuhn +# Ken Whistler +# +# Copyright (c) 1998 - 1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-15:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-15 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-15 order. +# +# Version history +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x20AC # EURO SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 # LATIN CAPITAL LETTER ETH +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE # LATIN CAPITAL LETTER THORN +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 # LATIN SMALL LETTER ETH +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE # LATIN SMALL LETTER THORN +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + diff --git a/extra/io/encodings/8-bit/8859-16.TXT b/extra/io/encodings/8-bit/8859-16.TXT new file mode 100644 index 0000000000..c0dcf0dac6 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-16.TXT @@ -0,0 +1,299 @@ +# +# Name: ISO/IEC 8859-16:2001 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 2001 July 26 +# Authors: Markus Kuhn +# +# Copyright (c) 1999-2001 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-16:2001 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-16 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-16 order. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xA3 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xA4 0x20AC # EURO SIGN +0xA5 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xA6 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xA7 0x00A7 # SECTION SIGN +0xA8 0x0161 # LATIN SMALL LETTER S WITH CARON +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x0218 # LATIN CAPITAL LETTER S WITH COMMA BELOW +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xB3 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xB4 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xB5 0x201D # RIGHT DOUBLE QUOTATION MARK +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x017E # LATIN SMALL LETTER Z WITH CARON +0xB9 0x010D # LATIN SMALL LETTER C WITH CARON +0xBA 0x0219 # LATIN SMALL LETTER S WITH COMMA BELOW +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x0152 # LATIN CAPITAL LIGATURE OE +0xBD 0x0153 # LATIN SMALL LIGATURE OE +0xBE 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x0102 # LATIN CAPITAL LETTER A WITH BREVE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xD8 0x0170 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xDE 0x021A # LATIN CAPITAL LETTER T WITH COMMA BELOW +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x0103 # LATIN SMALL LETTER A WITH BREVE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xF8 0x0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xFE 0x021B # LATIN SMALL LETTER T WITH COMMA BELOW +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/extra/io/encodings/8-bit/8859-2.TXT b/extra/io/encodings/8-bit/8859-2.TXT new file mode 100644 index 0000000000..e45df25eb8 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-2.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO 8859-2:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-2:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-2 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-2 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x02D8 # BREVE +0xA3 0x0141 # LATIN CAPITAL LETTER L WITH STROKE +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x013D # LATIN CAPITAL LETTER L WITH CARON +0xA6 0x015A # LATIN CAPITAL LETTER S WITH ACUTE +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAA 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xAB 0x0164 # LATIN CAPITAL LETTER T WITH CARON +0xAC 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x02DB # OGONEK +0xB3 0x0142 # LATIN SMALL LETTER L WITH STROKE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x013E # LATIN SMALL LETTER L WITH CARON +0xB6 0x015B # LATIN SMALL LETTER S WITH ACUTE +0xB7 0x02C7 # CARON +0xB8 0x00B8 # CEDILLA +0xB9 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBA 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xBB 0x0165 # LATIN SMALL LETTER T WITH CARON +0xBC 0x017A # LATIN SMALL LETTER Z WITH ACUTE +0xBD 0x02DD # DOUBLE ACUTE ACCENT +0xBE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x0154 # LATIN CAPITAL LETTER R WITH ACUTE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x0102 # LATIN CAPITAL LETTER A WITH BREVE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x0139 # LATIN CAPITAL LETTER L WITH ACUTE +0xC6 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x011A # LATIN CAPITAL LETTER E WITH CARON +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x010E # LATIN CAPITAL LETTER D WITH CARON +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE +0xD2 0x0147 # LATIN CAPITAL LETTER N WITH CARON +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x0158 # LATIN CAPITAL LETTER R WITH CARON +0xD9 0x016E # LATIN CAPITAL LETTER U WITH RING ABOVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x0170 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x0162 # LATIN CAPITAL LETTER T WITH CEDILLA +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x0155 # LATIN SMALL LETTER R WITH ACUTE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x0103 # LATIN SMALL LETTER A WITH BREVE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x013A # LATIN SMALL LETTER L WITH ACUTE +0xE6 0x0107 # LATIN SMALL LETTER C WITH ACUTE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x011B # LATIN SMALL LETTER E WITH CARON +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x010F # LATIN SMALL LETTER D WITH CARON +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0144 # LATIN SMALL LETTER N WITH ACUTE +0xF2 0x0148 # LATIN SMALL LETTER N WITH CARON +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x0159 # LATIN SMALL LETTER R WITH CARON +0xF9 0x016F # LATIN SMALL LETTER U WITH RING ABOVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x0163 # LATIN SMALL LETTER T WITH CEDILLA +0xFF 0x02D9 # DOT ABOVE diff --git a/extra/io/encodings/8-bit/8859-3.TXT b/extra/io/encodings/8-bit/8859-3.TXT new file mode 100644 index 0000000000..9b6ac69dd8 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-3.TXT @@ -0,0 +1,296 @@ +# +# Name: ISO/IEC 8859-3:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-3:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-3 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-3 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0126 # LATIN CAPITAL LETTER H WITH STROKE +0xA2 0x02D8 # BREVE +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA6 0x0124 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0xAA 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xAB 0x011E # LATIN CAPITAL LETTER G WITH BREVE +0xAC 0x0134 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0xAD 0x00AD # SOFT HYPHEN +0xAF 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0127 # LATIN SMALL LETTER H WITH STROKE +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x0125 # LATIN SMALL LETTER H WITH CIRCUMFLEX +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x0131 # LATIN SMALL LETTER DOTLESS I +0xBA 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xBB 0x011F # LATIN SMALL LETTER G WITH BREVE +0xBC 0x0135 # LATIN SMALL LETTER J WITH CIRCUMFLEX +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBF 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x010A # LATIN CAPITAL LETTER C WITH DOT ABOVE +0xC6 0x0108 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x0120 # LATIN CAPITAL LETTER G WITH DOT ABOVE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x011C # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x016C # LATIN CAPITAL LETTER U WITH BREVE +0xDE 0x015C # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x010B # LATIN SMALL LETTER C WITH DOT ABOVE +0xE6 0x0109 # LATIN SMALL LETTER C WITH CIRCUMFLEX +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x0121 # LATIN SMALL LETTER G WITH DOT ABOVE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x011D # LATIN SMALL LETTER G WITH CIRCUMFLEX +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x016D # LATIN SMALL LETTER U WITH BREVE +0xFE 0x015D # LATIN SMALL LETTER S WITH CIRCUMFLEX +0xFF 0x02D9 # DOT ABOVE diff --git a/extra/io/encodings/8-bit/8859-4.TXT b/extra/io/encodings/8-bit/8859-4.TXT new file mode 100644 index 0000000000..662e698ab2 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-4.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO/IEC 8859-4:1998 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-4:1998 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-4 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-4 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK +0xA2 0x0138 # LATIN SMALL LETTER KRA +0xA3 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x0128 # LATIN CAPITAL LETTER I WITH TILDE +0xA6 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x0160 # LATIN CAPITAL LETTER S WITH CARON +0xAA 0x0112 # LATIN CAPITAL LETTER E WITH MACRON +0xAB 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA +0xAC 0x0166 # LATIN CAPITAL LETTER T WITH STROKE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x017D # LATIN CAPITAL LETTER Z WITH CARON +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x0105 # LATIN SMALL LETTER A WITH OGONEK +0xB2 0x02DB # OGONEK +0xB3 0x0157 # LATIN SMALL LETTER R WITH CEDILLA +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x0129 # LATIN SMALL LETTER I WITH TILDE +0xB6 0x013C # LATIN SMALL LETTER L WITH CEDILLA +0xB7 0x02C7 # CARON +0xB8 0x00B8 # CEDILLA +0xB9 0x0161 # LATIN SMALL LETTER S WITH CARON +0xBA 0x0113 # LATIN SMALL LETTER E WITH MACRON +0xBB 0x0123 # LATIN SMALL LETTER G WITH CEDILLA +0xBC 0x0167 # LATIN SMALL LETTER T WITH STROKE +0xBD 0x014A # LATIN CAPITAL LETTER ENG +0xBE 0x017E # LATIN SMALL LETTER Z WITH CARON +0xBF 0x014B # LATIN SMALL LETTER ENG +0xC0 0x0100 # LATIN CAPITAL LETTER A WITH MACRON +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x012E # LATIN CAPITAL LETTER I WITH OGONEK +0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x012A # LATIN CAPITAL LETTER I WITH MACRON +0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE +0xD1 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA +0xD2 0x014C # LATIN CAPITAL LETTER O WITH MACRON +0xD3 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0168 # LATIN CAPITAL LETTER U WITH TILDE +0xDE 0x016A # LATIN CAPITAL LETTER U WITH MACRON +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x0101 # LATIN SMALL LETTER A WITH MACRON +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x012F # LATIN SMALL LETTER I WITH OGONEK +0xE8 0x010D # LATIN SMALL LETTER C WITH CARON +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x0119 # LATIN SMALL LETTER E WITH OGONEK +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x012B # LATIN SMALL LETTER I WITH MACRON +0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE +0xF1 0x0146 # LATIN SMALL LETTER N WITH CEDILLA +0xF2 0x014D # LATIN SMALL LETTER O WITH MACRON +0xF3 0x0137 # LATIN SMALL LETTER K WITH CEDILLA +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x0173 # LATIN SMALL LETTER U WITH OGONEK +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0169 # LATIN SMALL LETTER U WITH TILDE +0xFE 0x016B # LATIN SMALL LETTER U WITH MACRON +0xFF 0x02D9 # DOT ABOVE diff --git a/extra/io/encodings/8-bit/8859-5.TXT b/extra/io/encodings/8-bit/8859-5.TXT new file mode 100644 index 0000000000..a7ed1ce2ab --- /dev/null +++ b/extra/io/encodings/8-bit/8859-5.TXT @@ -0,0 +1,303 @@ +# +# Name: ISO 8859-5:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-5:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-5 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-5 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0401 # CYRILLIC CAPITAL LETTER IO +0xA2 0x0402 # CYRILLIC CAPITAL LETTER DJE +0xA3 0x0403 # CYRILLIC CAPITAL LETTER GJE +0xA4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xA5 0x0405 # CYRILLIC CAPITAL LETTER DZE +0xA6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0xA7 0x0407 # CYRILLIC CAPITAL LETTER YI +0xA8 0x0408 # CYRILLIC CAPITAL LETTER JE +0xA9 0x0409 # CYRILLIC CAPITAL LETTER LJE +0xAA 0x040A # CYRILLIC CAPITAL LETTER NJE +0xAB 0x040B # CYRILLIC CAPITAL LETTER TSHE +0xAC 0x040C # CYRILLIC CAPITAL LETTER KJE +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x040E # CYRILLIC CAPITAL LETTER SHORT U +0xAF 0x040F # CYRILLIC CAPITAL LETTER DZHE +0xB0 0x0410 # CYRILLIC CAPITAL LETTER A +0xB1 0x0411 # CYRILLIC CAPITAL LETTER BE +0xB2 0x0412 # CYRILLIC CAPITAL LETTER VE +0xB3 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xB4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xB5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xB6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xB7 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xB8 0x0418 # CYRILLIC CAPITAL LETTER I +0xB9 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xBA 0x041A # CYRILLIC CAPITAL LETTER KA +0xBB 0x041B # CYRILLIC CAPITAL LETTER EL +0xBC 0x041C # CYRILLIC CAPITAL LETTER EM +0xBD 0x041D # CYRILLIC CAPITAL LETTER EN +0xBE 0x041E # CYRILLIC CAPITAL LETTER O +0xBF 0x041F # CYRILLIC CAPITAL LETTER PE +0xC0 0x0420 # CYRILLIC CAPITAL LETTER ER +0xC1 0x0421 # CYRILLIC CAPITAL LETTER ES +0xC2 0x0422 # CYRILLIC CAPITAL LETTER TE +0xC3 0x0423 # CYRILLIC CAPITAL LETTER U +0xC4 0x0424 # CYRILLIC CAPITAL LETTER EF +0xC5 0x0425 # CYRILLIC CAPITAL LETTER HA +0xC6 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xC7 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xC8 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xC9 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xCA 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN +0xCB 0x042B # CYRILLIC CAPITAL LETTER YERU +0xCC 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xCD 0x042D # CYRILLIC CAPITAL LETTER E +0xCE 0x042E # CYRILLIC CAPITAL LETTER YU +0xCF 0x042F # CYRILLIC CAPITAL LETTER YA +0xD0 0x0430 # CYRILLIC SMALL LETTER A +0xD1 0x0431 # CYRILLIC SMALL LETTER BE +0xD2 0x0432 # CYRILLIC SMALL LETTER VE +0xD3 0x0433 # CYRILLIC SMALL LETTER GHE +0xD4 0x0434 # CYRILLIC SMALL LETTER DE +0xD5 0x0435 # CYRILLIC SMALL LETTER IE +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0437 # CYRILLIC SMALL LETTER ZE +0xD8 0x0438 # CYRILLIC SMALL LETTER I +0xD9 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xDA 0x043A # CYRILLIC SMALL LETTER KA +0xDB 0x043B # CYRILLIC SMALL LETTER EL +0xDC 0x043C # CYRILLIC SMALL LETTER EM +0xDD 0x043D # CYRILLIC SMALL LETTER EN +0xDE 0x043E # CYRILLIC SMALL LETTER O +0xDF 0x043F # CYRILLIC SMALL LETTER PE +0xE0 0x0440 # CYRILLIC SMALL LETTER ER +0xE1 0x0441 # CYRILLIC SMALL LETTER ES +0xE2 0x0442 # CYRILLIC SMALL LETTER TE +0xE3 0x0443 # CYRILLIC SMALL LETTER U +0xE4 0x0444 # CYRILLIC SMALL LETTER EF +0xE5 0x0445 # CYRILLIC SMALL LETTER HA +0xE6 0x0446 # CYRILLIC SMALL LETTER TSE +0xE7 0x0447 # CYRILLIC SMALL LETTER CHE +0xE8 0x0448 # CYRILLIC SMALL LETTER SHA +0xE9 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xEA 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xEB 0x044B # CYRILLIC SMALL LETTER YERU +0xEC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xED 0x044D # CYRILLIC SMALL LETTER E +0xEE 0x044E # CYRILLIC SMALL LETTER YU +0xEF 0x044F # CYRILLIC SMALL LETTER YA +0xF0 0x2116 # NUMERO SIGN +0xF1 0x0451 # CYRILLIC SMALL LETTER IO +0xF2 0x0452 # CYRILLIC SMALL LETTER DJE +0xF3 0x0453 # CYRILLIC SMALL LETTER GJE +0xF4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xF5 0x0455 # CYRILLIC SMALL LETTER DZE +0xF6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0xF7 0x0457 # CYRILLIC SMALL LETTER YI +0xF8 0x0458 # CYRILLIC SMALL LETTER JE +0xF9 0x0459 # CYRILLIC SMALL LETTER LJE +0xFA 0x045A # CYRILLIC SMALL LETTER NJE +0xFB 0x045B # CYRILLIC SMALL LETTER TSHE +0xFC 0x045C # CYRILLIC SMALL LETTER KJE +0xFD 0x00A7 # SECTION SIGN +0xFE 0x045E # CYRILLIC SMALL LETTER SHORT U +0xFF 0x045F # CYRILLIC SMALL LETTER DZHE diff --git a/extra/io/encodings/8-bit/8859-6.TXT b/extra/io/encodings/8-bit/8859-6.TXT new file mode 100644 index 0000000000..69ac7f5894 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-6.TXT @@ -0,0 +1,260 @@ +# +# Name: ISO 8859-6:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-6:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-6 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-6 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# 0x30..0x39 remapped to the ASCII digits (U+0030..U+0039) instead +# of the Arabic digits (U+0660..U+0669). +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA4 0x00A4 # CURRENCY SIGN +0xAC 0x060C # ARABIC COMMA +0xAD 0x00AD # SOFT HYPHEN +0xBB 0x061B # ARABIC SEMICOLON +0xBF 0x061F # ARABIC QUESTION MARK +0xC1 0x0621 # ARABIC LETTER HAMZA +0xC2 0x0622 # ARABIC LETTER ALEF WITH MADDA ABOVE +0xC3 0x0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE +0xC4 0x0624 # ARABIC LETTER WAW WITH HAMZA ABOVE +0xC5 0x0625 # ARABIC LETTER ALEF WITH HAMZA BELOW +0xC6 0x0626 # ARABIC LETTER YEH WITH HAMZA ABOVE +0xC7 0x0627 # ARABIC LETTER ALEF +0xC8 0x0628 # ARABIC LETTER BEH +0xC9 0x0629 # ARABIC LETTER TEH MARBUTA +0xCA 0x062A # ARABIC LETTER TEH +0xCB 0x062B # ARABIC LETTER THEH +0xCC 0x062C # ARABIC LETTER JEEM +0xCD 0x062D # ARABIC LETTER HAH +0xCE 0x062E # ARABIC LETTER KHAH +0xCF 0x062F # ARABIC LETTER DAL +0xD0 0x0630 # ARABIC LETTER THAL +0xD1 0x0631 # ARABIC LETTER REH +0xD2 0x0632 # ARABIC LETTER ZAIN +0xD3 0x0633 # ARABIC LETTER SEEN +0xD4 0x0634 # ARABIC LETTER SHEEN +0xD5 0x0635 # ARABIC LETTER SAD +0xD6 0x0636 # ARABIC LETTER DAD +0xD7 0x0637 # ARABIC LETTER TAH +0xD8 0x0638 # ARABIC LETTER ZAH +0xD9 0x0639 # ARABIC LETTER AIN +0xDA 0x063A # ARABIC LETTER GHAIN +0xE0 0x0640 # ARABIC TATWEEL +0xE1 0x0641 # ARABIC LETTER FEH +0xE2 0x0642 # ARABIC LETTER QAF +0xE3 0x0643 # ARABIC LETTER KAF +0xE4 0x0644 # ARABIC LETTER LAM +0xE5 0x0645 # ARABIC LETTER MEEM +0xE6 0x0646 # ARABIC LETTER NOON +0xE7 0x0647 # ARABIC LETTER HEH +0xE8 0x0648 # ARABIC LETTER WAW +0xE9 0x0649 # ARABIC LETTER ALEF MAKSURA +0xEA 0x064A # ARABIC LETTER YEH +0xEB 0x064B # ARABIC FATHATAN +0xEC 0x064C # ARABIC DAMMATAN +0xED 0x064D # ARABIC KASRATAN +0xEE 0x064E # ARABIC FATHA +0xEF 0x064F # ARABIC DAMMA +0xF0 0x0650 # ARABIC KASRA +0xF1 0x0651 # ARABIC SHADDA +0xF2 0x0652 # ARABIC SUKUN diff --git a/extra/io/encodings/8-bit/8859-7.TXT b/extra/io/encodings/8-bit/8859-7.TXT new file mode 100644 index 0000000000..bc46b74719 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-7.TXT @@ -0,0 +1,308 @@ +# +# Name: ISO 8859-7:2003 to Unicode +# Unicode version: 4.0 +# Table version: 2.0 +# Table format: Format A +# Date: 2003-Nov-12 +# Authors: Ken Whistler +# +# Copyright (c) 1991-2003 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO 8859-7:2003 characters map into Unicode. +# +# ISO 8859-7:1987 is equivalent to ISO-IR-126, ELOT 928, +# and ECMA 118. ISO 8859-7:2003 adds two currency signs +# and one other character not in the earlier standard. +# +# Format: Three tab-separated columns +# Column #1 is the ISO 8859-7 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO 8859-7 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# Remap 0xA1 to U+2018 (instead of 0x02BD) to match text of 8859-7 +# Remap 0xA2 to U+2019 (instead of 0x02BC) to match text of 8859-7 +# +# 2.0 version updates 1.0 version by adding mappings for the +# three newly added characters 0xA4, 0xA5, 0xAA. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact the Unicode Consortium at: +# +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x2018 # LEFT SINGLE QUOTATION MARK +0xA2 0x2019 # RIGHT SINGLE QUOTATION MARK +0xA3 0x00A3 # POUND SIGN +0xA4 0x20AC # EURO SIGN +0xA5 0x20AF # DRACHMA SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x037A # GREEK YPOGEGRAMMENI +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAF 0x2015 # HORIZONTAL BAR +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x0384 # GREEK TONOS +0xB5 0x0385 # GREEK DIALYTIKA TONOS +0xB6 0x0386 # GREEK CAPITAL LETTER ALPHA WITH TONOS +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x0388 # GREEK CAPITAL LETTER EPSILON WITH TONOS +0xB9 0x0389 # GREEK CAPITAL LETTER ETA WITH TONOS +0xBA 0x038A # GREEK CAPITAL LETTER IOTA WITH TONOS +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x038C # GREEK CAPITAL LETTER OMICRON WITH TONOS +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x038E # GREEK CAPITAL LETTER UPSILON WITH TONOS +0xBF 0x038F # GREEK CAPITAL LETTER OMEGA WITH TONOS +0xC0 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0xC1 0x0391 # GREEK CAPITAL LETTER ALPHA +0xC2 0x0392 # GREEK CAPITAL LETTER BETA +0xC3 0x0393 # GREEK CAPITAL LETTER GAMMA +0xC4 0x0394 # GREEK CAPITAL LETTER DELTA +0xC5 0x0395 # GREEK CAPITAL LETTER EPSILON +0xC6 0x0396 # GREEK CAPITAL LETTER ZETA +0xC7 0x0397 # GREEK CAPITAL LETTER ETA +0xC8 0x0398 # GREEK CAPITAL LETTER THETA +0xC9 0x0399 # GREEK CAPITAL LETTER IOTA +0xCA 0x039A # GREEK CAPITAL LETTER KAPPA +0xCB 0x039B # GREEK CAPITAL LETTER LAMDA +0xCC 0x039C # GREEK CAPITAL LETTER MU +0xCD 0x039D # GREEK CAPITAL LETTER NU +0xCE 0x039E # GREEK CAPITAL LETTER XI +0xCF 0x039F # GREEK CAPITAL LETTER OMICRON +0xD0 0x03A0 # GREEK CAPITAL LETTER PI +0xD1 0x03A1 # GREEK CAPITAL LETTER RHO +0xD3 0x03A3 # GREEK CAPITAL LETTER SIGMA +0xD4 0x03A4 # GREEK CAPITAL LETTER TAU +0xD5 0x03A5 # GREEK CAPITAL LETTER UPSILON +0xD6 0x03A6 # GREEK CAPITAL LETTER PHI +0xD7 0x03A7 # GREEK CAPITAL LETTER CHI +0xD8 0x03A8 # GREEK CAPITAL LETTER PSI +0xD9 0x03A9 # GREEK CAPITAL LETTER OMEGA +0xDA 0x03AA # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +0xDB 0x03AB # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +0xDC 0x03AC # GREEK SMALL LETTER ALPHA WITH TONOS +0xDD 0x03AD # GREEK SMALL LETTER EPSILON WITH TONOS +0xDE 0x03AE # GREEK SMALL LETTER ETA WITH TONOS +0xDF 0x03AF # GREEK SMALL LETTER IOTA WITH TONOS +0xE0 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +0xE1 0x03B1 # GREEK SMALL LETTER ALPHA +0xE2 0x03B2 # GREEK SMALL LETTER BETA +0xE3 0x03B3 # GREEK SMALL LETTER GAMMA +0xE4 0x03B4 # GREEK SMALL LETTER DELTA +0xE5 0x03B5 # GREEK SMALL LETTER EPSILON +0xE6 0x03B6 # GREEK SMALL LETTER ZETA +0xE7 0x03B7 # GREEK SMALL LETTER ETA +0xE8 0x03B8 # GREEK SMALL LETTER THETA +0xE9 0x03B9 # GREEK SMALL LETTER IOTA +0xEA 0x03BA # GREEK SMALL LETTER KAPPA +0xEB 0x03BB # GREEK SMALL LETTER LAMDA +0xEC 0x03BC # GREEK SMALL LETTER MU +0xED 0x03BD # GREEK SMALL LETTER NU +0xEE 0x03BE # GREEK SMALL LETTER XI +0xEF 0x03BF # GREEK SMALL LETTER OMICRON +0xF0 0x03C0 # GREEK SMALL LETTER PI +0xF1 0x03C1 # GREEK SMALL LETTER RHO +0xF2 0x03C2 # GREEK SMALL LETTER FINAL SIGMA +0xF3 0x03C3 # GREEK SMALL LETTER SIGMA +0xF4 0x03C4 # GREEK SMALL LETTER TAU +0xF5 0x03C5 # GREEK SMALL LETTER UPSILON +0xF6 0x03C6 # GREEK SMALL LETTER PHI +0xF7 0x03C7 # GREEK SMALL LETTER CHI +0xF8 0x03C8 # GREEK SMALL LETTER PSI +0xF9 0x03C9 # GREEK SMALL LETTER OMEGA +0xFA 0x03CA # GREEK SMALL LETTER IOTA WITH DIALYTIKA +0xFB 0x03CB # GREEK SMALL LETTER UPSILON WITH DIALYTIKA +0xFC 0x03CC # GREEK SMALL LETTER OMICRON WITH TONOS +0xFD 0x03CD # GREEK SMALL LETTER UPSILON WITH TONOS +0xFE 0x03CE # GREEK SMALL LETTER OMEGA WITH TONOS diff --git a/extra/io/encodings/8-bit/8859-8.TXT b/extra/io/encodings/8-bit/8859-8.TXT new file mode 100644 index 0000000000..bc8da4c7fd --- /dev/null +++ b/extra/io/encodings/8-bit/8859-8.TXT @@ -0,0 +1,270 @@ +# +# Name: ISO/IEC 8859-8:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.1 +# Table format: Format A +# Date: 2000-Jan-03 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-8:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-8 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-8 order. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# 1.1 version updates to the published 8859-8:1999, correcting +# the mapping of 0xAF and adding mappings for LRM and RLM. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00D7 # MULTIPLICATION SIGN +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00F7 # DIVISION SIGN +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xDF 0x2017 # DOUBLE LOW LINE +0xE0 0x05D0 # HEBREW LETTER ALEF +0xE1 0x05D1 # HEBREW LETTER BET +0xE2 0x05D2 # HEBREW LETTER GIMEL +0xE3 0x05D3 # HEBREW LETTER DALET +0xE4 0x05D4 # HEBREW LETTER HE +0xE5 0x05D5 # HEBREW LETTER VAV +0xE6 0x05D6 # HEBREW LETTER ZAYIN +0xE7 0x05D7 # HEBREW LETTER HET +0xE8 0x05D8 # HEBREW LETTER TET +0xE9 0x05D9 # HEBREW LETTER YOD +0xEA 0x05DA # HEBREW LETTER FINAL KAF +0xEB 0x05DB # HEBREW LETTER KAF +0xEC 0x05DC # HEBREW LETTER LAMED +0xED 0x05DD # HEBREW LETTER FINAL MEM +0xEE 0x05DE # HEBREW LETTER MEM +0xEF 0x05DF # HEBREW LETTER FINAL NUN +0xF0 0x05E0 # HEBREW LETTER NUN +0xF1 0x05E1 # HEBREW LETTER SAMEKH +0xF2 0x05E2 # HEBREW LETTER AYIN +0xF3 0x05E3 # HEBREW LETTER FINAL PE +0xF4 0x05E4 # HEBREW LETTER PE +0xF5 0x05E5 # HEBREW LETTER FINAL TSADI +0xF6 0x05E6 # HEBREW LETTER TSADI +0xF7 0x05E7 # HEBREW LETTER QOF +0xF8 0x05E8 # HEBREW LETTER RESH +0xF9 0x05E9 # HEBREW LETTER SHIN +0xFA 0x05EA # HEBREW LETTER TAV +0xFD 0x200E # LEFT-TO-RIGHT MARK +0xFE 0x200F # RIGHT-TO-LEFT MARK + diff --git a/extra/io/encodings/8-bit/8859-9.TXT b/extra/io/encodings/8-bit/8859-9.TXT new file mode 100644 index 0000000000..22901f1077 --- /dev/null +++ b/extra/io/encodings/8-bit/8859-9.TXT @@ -0,0 +1,307 @@ +# +# Name: ISO/IEC 8859-9:1999 to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 1999 July 27 +# Authors: Ken Whistler +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on magnetic media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ISO/IEC 8859-9:1999 characters map into Unicode. +# +# Format: Three tab-separated columns +# Column #1 is the ISO/IEC 8859-9 code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ISO/IEC 8859-9 order. +# +# ISO/IEC 8859-9 is also equivalent to ISO-IR-148. +# +# Version history +# 1.0 version updates 0.1 version by adding mappings for all +# control characters. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x00A1 # INVERTED EXCLAMATION MARK +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A4 # CURRENCY SIGN +0xA5 0x00A5 # YEN SIGN +0xA6 0x00A6 # BROKEN BAR +0xA7 0x00A7 # SECTION SIGN +0xA8 0x00A8 # DIAERESIS +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x00AA # FEMININE ORDINAL INDICATOR +0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC # NOT SIGN +0xAD 0x00AD # SOFT HYPHEN +0xAE 0x00AE # REGISTERED SIGN +0xAF 0x00AF # MACRON +0xB0 0x00B0 # DEGREE SIGN +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x00B2 # SUPERSCRIPT TWO +0xB3 0x00B3 # SUPERSCRIPT THREE +0xB4 0x00B4 # ACUTE ACCENT +0xB5 0x00B5 # MICRO SIGN +0xB6 0x00B6 # PILCROW SIGN +0xB7 0x00B7 # MIDDLE DOT +0xB8 0x00B8 # CEDILLA +0xB9 0x00B9 # SUPERSCRIPT ONE +0xBA 0x00BA # MASCULINE ORDINAL INDICATOR +0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC # VULGAR FRACTION ONE QUARTER +0xBD 0x00BD # VULGAR FRACTION ONE HALF +0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF # INVERTED QUESTION MARK +0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 # LATIN CAPITAL LETTER AE +0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x011E # LATIN CAPITAL LETTER G WITH BREVE +0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 # MULTIPLICATION SIGN +0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE +0xDE 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA +0xDF 0x00DF # LATIN SMALL LETTER SHARP S +0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 # LATIN SMALL LETTER AE +0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x011F # LATIN SMALL LETTER G WITH BREVE +0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 # DIVISION SIGN +0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x0131 # LATIN SMALL LETTER DOTLESS I +0xFE 0x015F # LATIN SMALL LETTER S WITH CEDILLA +0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS + + diff --git a/extra/io/encodings/8-bit/CP037.TXT b/extra/io/encodings/8-bit/CP037.TXT new file mode 100644 index 0000000000..48fde2ae69 --- /dev/null +++ b/extra/io/encodings/8-bit/CP037.TXT @@ -0,0 +1,275 @@ +# +# Name: cp037_IBMUSCanada to Unicode table +# Unicode version: 2.0 +# Table version: 2.00 +# Table format: Format A +# Date: 04/24/96 +# Contact: Shawn.Steele@microsoft.com +# +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp037_IBMUSCanada code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp037_IBMUSCanada order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x009C #CONTROL +0x05 0x0009 #HORIZONTAL TABULATION +0x06 0x0086 #CONTROL +0x07 0x007F #DELETE +0x08 0x0097 #CONTROL +0x09 0x008D #CONTROL +0x0A 0x008E #CONTROL +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x009D #CONTROL +0x15 0x0085 #CONTROL +0x16 0x0008 #BACKSPACE +0x17 0x0087 #CONTROL +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x0092 #CONTROL +0x1B 0x008F #CONTROL +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0080 #CONTROL +0x21 0x0081 #CONTROL +0x22 0x0082 #CONTROL +0x23 0x0083 #CONTROL +0x24 0x0084 #CONTROL +0x25 0x000A #LINE FEED +0x26 0x0017 #END OF TRANSMISSION BLOCK +0x27 0x001B #ESCAPE +0x28 0x0088 #CONTROL +0x29 0x0089 #CONTROL +0x2A 0x008A #CONTROL +0x2B 0x008B #CONTROL +0x2C 0x008C #CONTROL +0x2D 0x0005 #ENQUIRY +0x2E 0x0006 #ACKNOWLEDGE +0x2F 0x0007 #BELL +0x30 0x0090 #CONTROL +0x31 0x0091 #CONTROL +0x32 0x0016 #SYNCHRONOUS IDLE +0x33 0x0093 #CONTROL +0x34 0x0094 #CONTROL +0x35 0x0095 #CONTROL +0x36 0x0096 #CONTROL +0x37 0x0004 #END OF TRANSMISSION +0x38 0x0098 #CONTROL +0x39 0x0099 #CONTROL +0x3A 0x009A #CONTROL +0x3B 0x009B #CONTROL +0x3C 0x0014 #DEVICE CONTROL FOUR +0x3D 0x0015 #NEGATIVE ACKNOWLEDGE +0x3E 0x009E #CONTROL +0x3F 0x001A #SUBSTITUTE +0x40 0x0020 #SPACE +0x41 0x00A0 #NO-BREAK SPACE +0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x43 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE +0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE +0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE +0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA +0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE +0x4A 0x00A2 #CENT SIGN +0x4B 0x002E #FULL STOP +0x4C 0x003C #LESS-THAN SIGN +0x4D 0x0028 #LEFT PARENTHESIS +0x4E 0x002B #PLUS SIGN +0x4F 0x007C #VERTICAL LINE +0x50 0x0026 #AMPERSAND +0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE +0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS +0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE +0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE +0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS +0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE +0x59 0x00DF #LATIN SMALL LETTER SHARP S (GERMAN) +0x5A 0x0021 #EXCLAMATION MARK +0x5B 0x0024 #DOLLAR SIGN +0x5C 0x002A #ASTERISK +0x5D 0x0029 #RIGHT PARENTHESIS +0x5E 0x003B #SEMICOLON +0x5F 0x00AC #NOT SIGN +0x60 0x002D #HYPHEN-MINUS +0x61 0x002F #SOLIDUS +0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x63 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE +0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE +0x6A 0x00A6 #BROKEN BAR +0x6B 0x002C #COMMA +0x6C 0x0025 #PERCENT SIGN +0x6D 0x005F #LOW LINE +0x6E 0x003E #GREATER-THAN SIGN +0x6F 0x003F #QUESTION MARK +0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE +0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE +0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE +0x79 0x0060 #GRAVE ACCENT +0x7A 0x003A #COLON +0x7B 0x0023 #NUMBER SIGN +0x7C 0x0040 #COMMERCIAL AT +0x7D 0x0027 #APOSTROPHE +0x7E 0x003D #EQUALS SIGN +0x7F 0x0022 #QUOTATION MARK +0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE +0x81 0x0061 #LATIN SMALL LETTER A +0x82 0x0062 #LATIN SMALL LETTER B +0x83 0x0063 #LATIN SMALL LETTER C +0x84 0x0064 #LATIN SMALL LETTER D +0x85 0x0065 #LATIN SMALL LETTER E +0x86 0x0066 #LATIN SMALL LETTER F +0x87 0x0067 #LATIN SMALL LETTER G +0x88 0x0068 #LATIN SMALL LETTER H +0x89 0x0069 #LATIN SMALL LETTER I +0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8C 0x00F0 #LATIN SMALL LETTER ETH (ICELANDIC) +0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE +0x8E 0x00FE #LATIN SMALL LETTER THORN (ICELANDIC) +0x8F 0x00B1 #PLUS-MINUS SIGN +0x90 0x00B0 #DEGREE SIGN +0x91 0x006A #LATIN SMALL LETTER J +0x92 0x006B #LATIN SMALL LETTER K +0x93 0x006C #LATIN SMALL LETTER L +0x94 0x006D #LATIN SMALL LETTER M +0x95 0x006E #LATIN SMALL LETTER N +0x96 0x006F #LATIN SMALL LETTER O +0x97 0x0070 #LATIN SMALL LETTER P +0x98 0x0071 #LATIN SMALL LETTER Q +0x99 0x0072 #LATIN SMALL LETTER R +0x9A 0x00AA #FEMININE ORDINAL INDICATOR +0x9B 0x00BA #MASCULINE ORDINAL INDICATOR +0x9C 0x00E6 #LATIN SMALL LIGATURE AE +0x9D 0x00B8 #CEDILLA +0x9E 0x00C6 #LATIN CAPITAL LIGATURE AE +0x9F 0x00A4 #CURRENCY SIGN +0xA0 0x00B5 #MICRO SIGN +0xA1 0x007E #TILDE +0xA2 0x0073 #LATIN SMALL LETTER S +0xA3 0x0074 #LATIN SMALL LETTER T +0xA4 0x0075 #LATIN SMALL LETTER U +0xA5 0x0076 #LATIN SMALL LETTER V +0xA6 0x0077 #LATIN SMALL LETTER W +0xA7 0x0078 #LATIN SMALL LETTER X +0xA8 0x0079 #LATIN SMALL LETTER Y +0xA9 0x007A #LATIN SMALL LETTER Z +0xAA 0x00A1 #INVERTED EXCLAMATION MARK +0xAB 0x00BF #INVERTED QUESTION MARK +0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (ICELANDIC) +0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xAE 0x00DE #LATIN CAPITAL LETTER THORN (ICELANDIC) +0xAF 0x00AE #REGISTERED SIGN +0xB0 0x005E #CIRCUMFLEX ACCENT +0xB1 0x00A3 #POUND SIGN +0xB2 0x00A5 #YEN SIGN +0xB3 0x00B7 #MIDDLE DOT +0xB4 0x00A9 #COPYRIGHT SIGN +0xB5 0x00A7 #SECTION SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00BC #VULGAR FRACTION ONE QUARTER +0xB8 0x00BD #VULGAR FRACTION ONE HALF +0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS +0xBA 0x005B #LEFT SQUARE BRACKET +0xBB 0x005D #RIGHT SQUARE BRACKET +0xBC 0x00AF #MACRON +0xBD 0x00A8 #DIAERESIS +0xBE 0x00B4 #ACUTE ACCENT +0xBF 0x00D7 #MULTIPLICATION SIGN +0xC0 0x007B #LEFT CURLY BRACKET +0xC1 0x0041 #LATIN CAPITAL LETTER A +0xC2 0x0042 #LATIN CAPITAL LETTER B +0xC3 0x0043 #LATIN CAPITAL LETTER C +0xC4 0x0044 #LATIN CAPITAL LETTER D +0xC5 0x0045 #LATIN CAPITAL LETTER E +0xC6 0x0046 #LATIN CAPITAL LETTER F +0xC7 0x0047 #LATIN CAPITAL LETTER G +0xC8 0x0048 #LATIN CAPITAL LETTER H +0xC9 0x0049 #LATIN CAPITAL LETTER I +0xCA 0x00AD #SOFT HYPHEN +0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xCC 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE +0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE +0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE +0xD0 0x007D #RIGHT CURLY BRACKET +0xD1 0x004A #LATIN CAPITAL LETTER J +0xD2 0x004B #LATIN CAPITAL LETTER K +0xD3 0x004C #LATIN CAPITAL LETTER L +0xD4 0x004D #LATIN CAPITAL LETTER M +0xD5 0x004E #LATIN CAPITAL LETTER N +0xD6 0x004F #LATIN CAPITAL LETTER O +0xD7 0x0050 #LATIN CAPITAL LETTER P +0xD8 0x0051 #LATIN CAPITAL LETTER Q +0xD9 0x0052 #LATIN CAPITAL LETTER R +0xDA 0x00B9 #SUPERSCRIPT ONE +0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xDC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE +0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE +0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS +0xE0 0x005C #REVERSE SOLIDUS +0xE1 0x00F7 #DIVISION SIGN +0xE2 0x0053 #LATIN CAPITAL LETTER S +0xE3 0x0054 #LATIN CAPITAL LETTER T +0xE4 0x0055 #LATIN CAPITAL LETTER U +0xE5 0x0056 #LATIN CAPITAL LETTER V +0xE6 0x0057 #LATIN CAPITAL LETTER W +0xE7 0x0058 #LATIN CAPITAL LETTER X +0xE8 0x0059 #LATIN CAPITAL LETTER Y +0xE9 0x005A #LATIN CAPITAL LETTER Z +0xEA 0x00B2 #SUPERSCRIPT TWO +0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xEC 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xF0 0x0030 #DIGIT ZERO +0xF1 0x0031 #DIGIT ONE +0xF2 0x0032 #DIGIT TWO +0xF3 0x0033 #DIGIT THREE +0xF4 0x0034 #DIGIT FOUR +0xF5 0x0035 #DIGIT FIVE +0xF6 0x0036 #DIGIT SIX +0xF7 0x0037 #DIGIT SEVEN +0xF8 0x0038 #DIGIT EIGHT +0xF9 0x0039 #DIGIT NINE +0xFA 0x00B3 #SUPERSCRIPT THREE +0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xFC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xFF 0x009F #CONTROL + + \ No newline at end of file diff --git a/extra/io/encodings/8-bit/CP1252.TXT b/extra/io/encodings/8-bit/CP1252.TXT new file mode 100644 index 0000000000..8ff4b204b7 --- /dev/null +++ b/extra/io/encodings/8-bit/CP1252.TXT @@ -0,0 +1,274 @@ +# +# Name: cp1252 to Unicode table +# Unicode version: 2.0 +# Table version: 2.01 +# Table format: Format A +# Date: 04/15/98 +# +# Contact: Shawn.Steele@microsoft.com +# +# General notes: none +# +# Format: Three tab-separated columns +# Column #1 is the cp1252 code (in hex) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 is the Unicode name (follows a comment sign, '#') +# +# The entries are in cp1252 order +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x0004 #END OF TRANSMISSION +0x05 0x0005 #ENQUIRY +0x06 0x0006 #ACKNOWLEDGE +0x07 0x0007 #BELL +0x08 0x0008 #BACKSPACE +0x09 0x0009 #HORIZONTAL TABULATION +0x0A 0x000A #LINE FEED +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x0014 #DEVICE CONTROL FOUR +0x15 0x0015 #NEGATIVE ACKNOWLEDGE +0x16 0x0016 #SYNCHRONOUS IDLE +0x17 0x0017 #END OF TRANSMISSION BLOCK +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x001A #SUBSTITUTE +0x1B 0x001B #ESCAPE +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0020 #SPACE +0x21 0x0021 #EXCLAMATION MARK +0x22 0x0022 #QUOTATION MARK +0x23 0x0023 #NUMBER SIGN +0x24 0x0024 #DOLLAR SIGN +0x25 0x0025 #PERCENT SIGN +0x26 0x0026 #AMPERSAND +0x27 0x0027 #APOSTROPHE +0x28 0x0028 #LEFT PARENTHESIS +0x29 0x0029 #RIGHT PARENTHESIS +0x2A 0x002A #ASTERISK +0x2B 0x002B #PLUS SIGN +0x2C 0x002C #COMMA +0x2D 0x002D #HYPHEN-MINUS +0x2E 0x002E #FULL STOP +0x2F 0x002F #SOLIDUS +0x30 0x0030 #DIGIT ZERO +0x31 0x0031 #DIGIT ONE +0x32 0x0032 #DIGIT TWO +0x33 0x0033 #DIGIT THREE +0x34 0x0034 #DIGIT FOUR +0x35 0x0035 #DIGIT FIVE +0x36 0x0036 #DIGIT SIX +0x37 0x0037 #DIGIT SEVEN +0x38 0x0038 #DIGIT EIGHT +0x39 0x0039 #DIGIT NINE +0x3A 0x003A #COLON +0x3B 0x003B #SEMICOLON +0x3C 0x003C #LESS-THAN SIGN +0x3D 0x003D #EQUALS SIGN +0x3E 0x003E #GREATER-THAN SIGN +0x3F 0x003F #QUESTION MARK +0x40 0x0040 #COMMERCIAL AT +0x41 0x0041 #LATIN CAPITAL LETTER A +0x42 0x0042 #LATIN CAPITAL LETTER B +0x43 0x0043 #LATIN CAPITAL LETTER C +0x44 0x0044 #LATIN CAPITAL LETTER D +0x45 0x0045 #LATIN CAPITAL LETTER E +0x46 0x0046 #LATIN CAPITAL LETTER F +0x47 0x0047 #LATIN CAPITAL LETTER G +0x48 0x0048 #LATIN CAPITAL LETTER H +0x49 0x0049 #LATIN CAPITAL LETTER I +0x4A 0x004A #LATIN CAPITAL LETTER J +0x4B 0x004B #LATIN CAPITAL LETTER K +0x4C 0x004C #LATIN CAPITAL LETTER L +0x4D 0x004D #LATIN CAPITAL LETTER M +0x4E 0x004E #LATIN CAPITAL LETTER N +0x4F 0x004F #LATIN CAPITAL LETTER O +0x50 0x0050 #LATIN CAPITAL LETTER P +0x51 0x0051 #LATIN CAPITAL LETTER Q +0x52 0x0052 #LATIN CAPITAL LETTER R +0x53 0x0053 #LATIN CAPITAL LETTER S +0x54 0x0054 #LATIN CAPITAL LETTER T +0x55 0x0055 #LATIN CAPITAL LETTER U +0x56 0x0056 #LATIN CAPITAL LETTER V +0x57 0x0057 #LATIN CAPITAL LETTER W +0x58 0x0058 #LATIN CAPITAL LETTER X +0x59 0x0059 #LATIN CAPITAL LETTER Y +0x5A 0x005A #LATIN CAPITAL LETTER Z +0x5B 0x005B #LEFT SQUARE BRACKET +0x5C 0x005C #REVERSE SOLIDUS +0x5D 0x005D #RIGHT SQUARE BRACKET +0x5E 0x005E #CIRCUMFLEX ACCENT +0x5F 0x005F #LOW LINE +0x60 0x0060 #GRAVE ACCENT +0x61 0x0061 #LATIN SMALL LETTER A +0x62 0x0062 #LATIN SMALL LETTER B +0x63 0x0063 #LATIN SMALL LETTER C +0x64 0x0064 #LATIN SMALL LETTER D +0x65 0x0065 #LATIN SMALL LETTER E +0x66 0x0066 #LATIN SMALL LETTER F +0x67 0x0067 #LATIN SMALL LETTER G +0x68 0x0068 #LATIN SMALL LETTER H +0x69 0x0069 #LATIN SMALL LETTER I +0x6A 0x006A #LATIN SMALL LETTER J +0x6B 0x006B #LATIN SMALL LETTER K +0x6C 0x006C #LATIN SMALL LETTER L +0x6D 0x006D #LATIN SMALL LETTER M +0x6E 0x006E #LATIN SMALL LETTER N +0x6F 0x006F #LATIN SMALL LETTER O +0x70 0x0070 #LATIN SMALL LETTER P +0x71 0x0071 #LATIN SMALL LETTER Q +0x72 0x0072 #LATIN SMALL LETTER R +0x73 0x0073 #LATIN SMALL LETTER S +0x74 0x0074 #LATIN SMALL LETTER T +0x75 0x0075 #LATIN SMALL LETTER U +0x76 0x0076 #LATIN SMALL LETTER V +0x77 0x0077 #LATIN SMALL LETTER W +0x78 0x0078 #LATIN SMALL LETTER X +0x79 0x0079 #LATIN SMALL LETTER Y +0x7A 0x007A #LATIN SMALL LETTER Z +0x7B 0x007B #LEFT CURLY BRACKET +0x7C 0x007C #VERTICAL LINE +0x7D 0x007D #RIGHT CURLY BRACKET +0x7E 0x007E #TILDE +0x7F 0x007F #DELETE +0x80 0x20AC #EURO SIGN +0x81 #UNDEFINED +0x82 0x201A #SINGLE LOW-9 QUOTATION MARK +0x83 0x0192 #LATIN SMALL LETTER F WITH HOOK +0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK +0x85 0x2026 #HORIZONTAL ELLIPSIS +0x86 0x2020 #DAGGER +0x87 0x2021 #DOUBLE DAGGER +0x88 0x02C6 #MODIFIER LETTER CIRCUMFLEX ACCENT +0x89 0x2030 #PER MILLE SIGN +0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON +0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0x8C 0x0152 #LATIN CAPITAL LIGATURE OE +0x8D #UNDEFINED +0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON +0x8F #UNDEFINED +0x90 #UNDEFINED +0x91 0x2018 #LEFT SINGLE QUOTATION MARK +0x92 0x2019 #RIGHT SINGLE QUOTATION MARK +0x93 0x201C #LEFT DOUBLE QUOTATION MARK +0x94 0x201D #RIGHT DOUBLE QUOTATION MARK +0x95 0x2022 #BULLET +0x96 0x2013 #EN DASH +0x97 0x2014 #EM DASH +0x98 0x02DC #SMALL TILDE +0x99 0x2122 #TRADE MARK SIGN +0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON +0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0x9C 0x0153 #LATIN SMALL LIGATURE OE +0x9D #UNDEFINED +0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON +0x9F 0x0178 #LATIN CAPITAL LETTER Y WITH DIAERESIS +0xA0 0x00A0 #NO-BREAK SPACE +0xA1 0x00A1 #INVERTED EXCLAMATION MARK +0xA2 0x00A2 #CENT SIGN +0xA3 0x00A3 #POUND SIGN +0xA4 0x00A4 #CURRENCY SIGN +0xA5 0x00A5 #YEN SIGN +0xA6 0x00A6 #BROKEN BAR +0xA7 0x00A7 #SECTION SIGN +0xA8 0x00A8 #DIAERESIS +0xA9 0x00A9 #COPYRIGHT SIGN +0xAA 0x00AA #FEMININE ORDINAL INDICATOR +0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xAC 0x00AC #NOT SIGN +0xAD 0x00AD #SOFT HYPHEN +0xAE 0x00AE #REGISTERED SIGN +0xAF 0x00AF #MACRON +0xB0 0x00B0 #DEGREE SIGN +0xB1 0x00B1 #PLUS-MINUS SIGN +0xB2 0x00B2 #SUPERSCRIPT TWO +0xB3 0x00B3 #SUPERSCRIPT THREE +0xB4 0x00B4 #ACUTE ACCENT +0xB5 0x00B5 #MICRO SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00B7 #MIDDLE DOT +0xB8 0x00B8 #CEDILLA +0xB9 0x00B9 #SUPERSCRIPT ONE +0xBA 0x00BA #MASCULINE ORDINAL INDICATOR +0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xBC 0x00BC #VULGAR FRACTION ONE QUARTER +0xBD 0x00BD #VULGAR FRACTION ONE HALF +0xBE 0x00BE #VULGAR FRACTION THREE QUARTERS +0xBF 0x00BF #INVERTED QUESTION MARK +0xC0 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xC3 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE +0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0xC5 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0xC6 0x00C6 #LATIN CAPITAL LETTER AE +0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0xC8 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0xCA 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0xCC 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE +0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE +0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xCF 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0xD0 0x00D0 #LATIN CAPITAL LETTER ETH +0xD1 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE +0xD2 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xD5 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xD7 0x00D7 #MULTIPLICATION SIGN +0xD8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE +0xD9 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xDB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xDE 0x00DE #LATIN CAPITAL LETTER THORN +0xDF 0x00DF #LATIN SMALL LETTER SHARP S +0xE0 0x00E0 #LATIN SMALL LETTER A WITH GRAVE +0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE +0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0xE3 0x00E3 #LATIN SMALL LETTER A WITH TILDE +0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xE5 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0xE6 0x00E6 #LATIN SMALL LETTER AE +0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA +0xE8 0x00E8 #LATIN SMALL LETTER E WITH GRAVE +0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE +0xEA 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS +0xEC 0x00EC #LATIN SMALL LETTER I WITH GRAVE +0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE +0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0xEF 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS +0xF0 0x00F0 #LATIN SMALL LETTER ETH +0xF1 0x00F1 #LATIN SMALL LETTER N WITH TILDE +0xF2 0x00F2 #LATIN SMALL LETTER O WITH GRAVE +0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE +0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xF5 0x00F5 #LATIN SMALL LETTER O WITH TILDE +0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xF7 0x00F7 #DIVISION SIGN +0xF8 0x00F8 #LATIN SMALL LETTER O WITH STROKE +0xF9 0x00F9 #LATIN SMALL LETTER U WITH GRAVE +0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE +0xFB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE +0xFE 0x00FE #LATIN SMALL LETTER THORN +0xFF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS diff --git a/extra/io/encodings/8-bit/GSM0338.TXT b/extra/io/encodings/8-bit/GSM0338.TXT new file mode 100644 index 0000000000..ae804d635a --- /dev/null +++ b/extra/io/encodings/8-bit/GSM0338.TXT @@ -0,0 +1,239 @@ +# +# Name: GSM 03.38 to Unicode +# Unicode version: 3.0 +# Table version: 1.1 +# Table format: Format A +# Date: 2000 May 30 +# Authors: Ken Whistler +# Kent Karlsson +# Markus Kuhn +# +# Copyright (c) 2000 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# ETSI GSM 03.38 7-bit default alphabet characters map into Unicode. +# This mapping is based on ETSI TS 100 900 V7.2.0 (1999-07), with +# a correction of 0x09 to *small* c-cedilla, instead of *capital* +# C-cedilla. +# +# Format: Three tab-separated columns +# Column #1 is the ETSI GSM 03.38 7-bit default alphabet +# code (in hex as 0xXX, or 0xXXXX for double-byte +# sequences) +# Column #2 is the Unicode scalar value (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in ETSI GSM 03.38 7-bit default alphabet code order. +# +# Note that ETSI GSM 03.38 also allows for the use of UCS-2 (UTF-16 +# restricted to the BMP) in GSM/SMS messages. +# +# Note also that there are commented Greek mappings for some +# capital Latin characters. This follows from the clear intent +# of the ETSI GSM 03.38 to have glyph coverage for the uppercase +# Greek alphabet by reusing Latin letters that have the same +# form as an uppercase Greek letter. Conversion implementations +# should be aware of this fact. +# +# The ETSI GSM 03.38 specification shows an uppercase C-cedilla +# glyph at 0x09. This may be the result of limited display +# capabilities for handling characters with descenders. However, the +# language coverage intent is clearly for the lowercase c-cedilla, as shown +# in the mapping below. The mapping for uppercase C-cedilla is shown +# in a commented line in the mapping table. +# +# The ESC character 0x1B is +# mapped to the no-break space character, unless it is part of a +# valid ESC sequence, to facilitate round-trip compatibility in +# the presence of unknown ESC sequences. +# +# 0x00 is NULL (when followed only by 0x00 up to the +# end of (fixed byte length) message, possibly also up to +# FORM FEED. But 0x00 is also the code for COMMERCIAL AT +# when some other character (CARRIAGE RETURN if nothing else) +# comes after the 0x00. +# +# Version history +# 1.0 version: first creation +# 1.1 version: fixed problem with the wrong line being a comment, +# added text regarding 0x00's interpretation, +# added second mapping for C-cedilla, +# added mapping of 0x1B escape to NBSP for display. +# +# Updated versions of this file may be found in: +# +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0040 # COMMERCIAL AT +#0x00 0x0000 # NULL (see note above) +0x01 0x00A3 # POUND SIGN +0x02 0x0024 # DOLLAR SIGN +0x03 0x00A5 # YEN SIGN +0x04 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0x05 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0x06 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0x07 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0x08 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0x09 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +#0x09 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA (see note above) +0x0A 0x000A # LINE FEED +0x0B 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0x0C 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x0F 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x10 0x0394 # GREEK CAPITAL LETTER DELTA +0x11 0x005F # LOW LINE +0x12 0x03A6 # GREEK CAPITAL LETTER PHI +0x13 0x0393 # GREEK CAPITAL LETTER GAMMA +0x14 0x039B # GREEK CAPITAL LETTER LAMDA +0x15 0x03A9 # GREEK CAPITAL LETTER OMEGA +0x16 0x03A0 # GREEK CAPITAL LETTER PI +0x17 0x03A8 # GREEK CAPITAL LETTER PSI +0x18 0x03A3 # GREEK CAPITAL LETTER SIGMA +0x19 0x0398 # GREEK CAPITAL LETTER THETA +0x1A 0x039E # GREEK CAPITAL LETTER XI +0x1B 0x00A0 # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) +0x1B0A 0x000C # FORM FEED +0x1B14 0x005E # CIRCUMFLEX ACCENT +0x1B28 0x007B # LEFT CURLY BRACKET +0x1B29 0x007D # RIGHT CURLY BRACKET +0x1B2F 0x005C # REVERSE SOLIDUS +0x1B3C 0x005B # LEFT SQUARE BRACKET +0x1B3D 0x007E # TILDE +0x1B3E 0x005D # RIGHT SQUARE BRACKET +0x1B40 0x007C # VERTICAL LINE +0x1B65 0x20AC # EURO SIGN +0x1C 0x00C6 # LATIN CAPITAL LETTER AE +0x1D 0x00E6 # LATIN SMALL LETTER AE +0x1E 0x00DF # LATIN SMALL LETTER SHARP S (German) +0x1F 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x00A4 # CURRENCY SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x00A1 # INVERTED EXCLAMATION MARK +0x41 0x0041 # LATIN CAPITAL LETTER A +#0x41 0x0391 # GREEK CAPITAL LETTER ALPHA +0x42 0x0042 # LATIN CAPITAL LETTER B +#0x42 0x0392 # GREEK CAPITAL LETTER BETA +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +#0x45 0x0395 # GREEK CAPITAL LETTER EPSILON +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +#0x48 0x0397 # GREEK CAPITAL LETTER ETA +0x49 0x0049 # LATIN CAPITAL LETTER I +#0x49 0x0399 # GREEK CAPITAL LETTER IOTA +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +#0x4B 0x039A # GREEK CAPITAL LETTER KAPPA +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +#0x4D 0x039C # GREEK CAPITAL LETTER MU +0x4E 0x004E # LATIN CAPITAL LETTER N +#0x4E 0x039D # GREEK CAPITAL LETTER NU +0x4F 0x004F # LATIN CAPITAL LETTER O +#0x4F 0x039F # GREEK CAPITAL LETTER OMICRON +0x50 0x0050 # LATIN CAPITAL LETTER P +#0x50 0x03A1 # GREEK CAPITAL LETTER RHO +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +#0x54 0x03A4 # GREEK CAPITAL LETTER TAU +0x55 0x0055 # LATIN CAPITAL LETTER U +#0x55 0x03A5 # GREEK CAPITAL LETTER UPSILON +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +#0x58 0x03A7 # GREEK CAPITAL LETTER CHI +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +#0x5A 0x0396 # GREEK CAPITAL LETTER ZETA +0x5B 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x5C 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x5D 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x5E 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x5F 0x00A7 # SECTION SIGN +0x60 0x00BF # INVERTED QUESTION MARK +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x7C 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x7D 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0x7E 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0x7F 0x00E0 # LATIN SMALL LETTER A WITH GRAVE diff --git a/extra/io/encodings/8-bit/KOI8-R.TXT b/extra/io/encodings/8-bit/KOI8-R.TXT new file mode 100644 index 0000000000..510561005c --- /dev/null +++ b/extra/io/encodings/8-bit/KOI8-R.TXT @@ -0,0 +1,302 @@ +# +# Name: KOI8-R (RFC1489) to Unicode +# Unicode version: 3.0 +# Table version: 1.0 +# Table format: Format A +# Date: 18 August 1999 +# Authors: Helmut Richter +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# General notes: +# +# This table contains the data the Unicode Consortium has on how +# KOI8-R characters map into Unicode. The underlying document is the +# mapping described in RFC 1489. No statements are made as to whether +# this mapping is the same as the mapping defined as "Code Page 878" +# with some vendors. +# +# Format: Three tab-separated columns +# Column #1 is the KOI8-R code (in hex as 0xXX) +# Column #2 is the Unicode (in hex as 0xXXXX) +# Column #3 the Unicode name (follows a comment sign, '#') +# +# The entries are in KOI8-R order. +# +# Version history +# 1.0 version: created. +# +# Any comments or problems, contact +# Please note that is an archival address; +# notices will be checked, but do not expect an immediate response. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/extra/io/encodings/8-bit/ROMAN.TXT b/extra/io/encodings/8-bit/ROMAN.TXT new file mode 100644 index 0000000000..5b3b8b4005 --- /dev/null +++ b/extra/io/encodings/8-bit/ROMAN.TXT @@ -0,0 +1,370 @@ +#======================================================================= +# File name: ROMAN.TXT +# +# Contents: Map (external version) from Mac OS Roman +# character set to Unicode 2.1 and later. +# +# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights +# reserved. +# +# Contact: charsets@apple.com +# +# Changes: +# +# c02 2005-Apr-05 Update header comments. Matches internal xml +# and Text Encoding Converter 2.0. +# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal +# utom. +# b03 1999-Sep-22 Update contact e-mail address. Matches +# internal utom, ufrm, and Text +# Encoding Converter version 1.5. +# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change +# mapping of 0xDB from CURRENCY SIGN to +# EURO SIGN. Matches internal utom, +# ufrm. +# n08 1998-Feb-05 Minor update to header comments +# n06 1997-Dec-14 Add warning about future changes to 0xDB +# from CURRENCY SIGN to EURO SIGN. Clarify +# some header information +# n04 1997-Dec-01 Update to match internal utom, ufrm: +# Change standard mapping for 0xBD from U+2126 +# to its canonical decomposition, U+03A9. +# n03 1995-Apr-15 First version (after fixing some typos). +# Matches internal ufrm. +# +# Standard header: +# ---------------- +# +# Apple, the Apple logo, and Macintosh are trademarks of Apple +# Computer, Inc., registered in the United States and other countries. +# Unicode is a trademark of Unicode Inc. For the sake of brevity, +# throughout this document, "Macintosh" can be used to refer to +# Macintosh computers and "Unicode" can be used to refer to the +# Unicode standard. +# +# Apple Computer, Inc. ("Apple") makes no warranty or representation, +# either express or implied, with respect to this document and the +# included data, its quality, accuracy, or fitness for a particular +# purpose. In no event will Apple be liable for direct, indirect, +# special, incidental, or consequential damages resulting from any +# defect or inaccuracy in this document or the included data. +# +# These mapping tables and character lists are subject to change. +# The latest tables should be available from the following: +# +# +# +# For general information about Mac OS encodings and these mapping +# tables, see the file "README.TXT". +# +# Format: +# ------- +# +# Three tab-separated columns; +# '#' begins a comment which continues to the end of the line. +# Column #1 is the Mac OS Roman code (in hex as 0xNN) +# Column #2 is the corresponding Unicode (in hex as 0xNNNN) +# Column #3 is a comment containing the Unicode name +# +# The entries are in Mac OS Roman code order. +# +# One of these mappings requires the use of a corporate character. +# See the file "CORPCHAR.TXT" and notes below. +# +# Control character mappings are not shown in this table, following +# the conventions of the standard UTC mapping tables. However, the +# Mac OS Roman character set uses the standard control characters at +# 0x00-0x1F and 0x7F. +# +# Notes on Mac OS Roman: +# ---------------------- +# +# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa +# environments, it is only supported directly in programming +# interfaces for QuickDraw Text, the Script Manager, and related +# Text Utilities. For other purposes it is supported via transcoding +# to and from Unicode. +# +# This character set is used for at least the following Mac OS +# localizations: U.S., British, Canadian French, French, Swiss +# French, German, Swiss German, Italian, Swiss Italian, Dutch, +# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan, +# Portuguese, Brazilian, and the default International system. +# +# Variants of Mac OS Roman are used for Croatian, Icelandic, +# Turkish, Romanian, and other encodings. Separate mapping tables +# are available for these encodings. +# +# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was +# mapped to U+00A4. In Mac OS 8.5 and later versions, code point +# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard +# Apple fonts are updated for Mac OS 8.5 to reflect this. There is +# a "currency sign" variant of the Mac OS Roman encoding that still +# maps 0xDB to U+00A4; this can be used for older fonts. +# +# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago, +# New York, Geneva, and Monaco did not implement the full Mac OS +# Roman character set; they only supported character codes up to +# 0xD8. The TrueType versions of these fonts have always implemented +# the full character set, as with the bitmap and TrueType versions +# of the other standard Roman fonts. +# +# In all Mac OS encodings, fonts such as Chicago which are used +# as "system" fonts (for menus, dialogs, etc.) have four glyphs +# at code points 0x11-0x14 for transient use by the Menu Manager. +# These glyphs are not intended as characters for use in normal +# text, and the associated code points are not generally +# interpreted as associated with these glyphs; they are usually +# interpreted (if at all) as the control codes DC1-DC4. +# +# Unicode mapping issues and notes: +# --------------------------------- +# +# The following corporate zone Unicode character is used in this +# mapping: +# +# 0xF8FF Apple logo +# +# NOTE: The graphic image associated with the Apple logo character +# is not authorized for use without permission of Apple, and +# unauthorized use might constitute trademark infringement. +# +# Details of mapping changes in each version: +# ------------------------------------------- +# +# Changes from version n08 to version b02: +# +# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from +# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC). +# +# Changes from version n03 to version n04: +# +# - Change mapping of 0xBD from U+2126 to its canonical +# decomposition, U+03A9. +# +################## + +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +# +0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS +0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE +0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA +0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE +0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE +0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS +0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS +0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE +0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE +0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX +0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS +0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE +0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE +0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA +0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE +0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE +0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX +0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS +0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE +0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE +0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX +0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS +0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE +0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE +0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE +0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX +0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS +0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE +0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE +0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE +0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX +0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS +0xA0 0x2020 # DAGGER +0xA1 0x00B0 # DEGREE SIGN +0xA2 0x00A2 # CENT SIGN +0xA3 0x00A3 # POUND SIGN +0xA4 0x00A7 # SECTION SIGN +0xA5 0x2022 # BULLET +0xA6 0x00B6 # PILCROW SIGN +0xA7 0x00DF # LATIN SMALL LETTER SHARP S +0xA8 0x00AE # REGISTERED SIGN +0xA9 0x00A9 # COPYRIGHT SIGN +0xAA 0x2122 # TRADE MARK SIGN +0xAB 0x00B4 # ACUTE ACCENT +0xAC 0x00A8 # DIAERESIS +0xAD 0x2260 # NOT EQUAL TO +0xAE 0x00C6 # LATIN CAPITAL LETTER AE +0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE +0xB0 0x221E # INFINITY +0xB1 0x00B1 # PLUS-MINUS SIGN +0xB2 0x2264 # LESS-THAN OR EQUAL TO +0xB3 0x2265 # GREATER-THAN OR EQUAL TO +0xB4 0x00A5 # YEN SIGN +0xB5 0x00B5 # MICRO SIGN +0xB6 0x2202 # PARTIAL DIFFERENTIAL +0xB7 0x2211 # N-ARY SUMMATION +0xB8 0x220F # N-ARY PRODUCT +0xB9 0x03C0 # GREEK SMALL LETTER PI +0xBA 0x222B # INTEGRAL +0xBB 0x00AA # FEMININE ORDINAL INDICATOR +0xBC 0x00BA # MASCULINE ORDINAL INDICATOR +0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA +0xBE 0x00E6 # LATIN SMALL LETTER AE +0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE +0xC0 0x00BF # INVERTED QUESTION MARK +0xC1 0x00A1 # INVERTED EXCLAMATION MARK +0xC2 0x00AC # NOT SIGN +0xC3 0x221A # SQUARE ROOT +0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK +0xC5 0x2248 # ALMOST EQUAL TO +0xC6 0x2206 # INCREMENT +0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0xC9 0x2026 # HORIZONTAL ELLIPSIS +0xCA 0x00A0 # NO-BREAK SPACE +0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE +0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE +0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE +0xCE 0x0152 # LATIN CAPITAL LIGATURE OE +0xCF 0x0153 # LATIN SMALL LIGATURE OE +0xD0 0x2013 # EN DASH +0xD1 0x2014 # EM DASH +0xD2 0x201C # LEFT DOUBLE QUOTATION MARK +0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK +0xD4 0x2018 # LEFT SINGLE QUOTATION MARK +0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK +0xD6 0x00F7 # DIVISION SIGN +0xD7 0x25CA # LOZENGE +0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS +0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS +0xDA 0x2044 # FRACTION SLASH +0xDB 0x20AC # EURO SIGN +0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +0xDE 0xFB01 # LATIN SMALL LIGATURE FI +0xDF 0xFB02 # LATIN SMALL LIGATURE FL +0xE0 0x2021 # DOUBLE DAGGER +0xE1 0x00B7 # MIDDLE DOT +0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK +0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK +0xE4 0x2030 # PER MILLE SIGN +0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE +0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS +0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE +0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE +0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS +0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE +0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xF0 0xF8FF # Apple logo +0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE +0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE +0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE +0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I +0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT +0xF7 0x02DC # SMALL TILDE +0xF8 0x00AF # MACRON +0xF9 0x02D8 # BREVE +0xFA 0x02D9 # DOT ABOVE +0xFB 0x02DA # RING ABOVE +0xFC 0x00B8 # CEDILLA +0xFD 0x02DD # DOUBLE ACUTE ACCENT +0xFE 0x02DB # OGONEK +0xFF 0x02C7 # CARON diff --git a/extra/io/encodings/latin1/authors.txt b/extra/io/encodings/8-bit/authors.txt similarity index 100% rename from extra/io/encodings/latin1/authors.txt rename to extra/io/encodings/8-bit/authors.txt diff --git a/extra/io/encodings/8-bit/summary.txt b/extra/io/encodings/8-bit/summary.txt new file mode 100644 index 0000000000..7fe8064015 --- /dev/null +++ b/extra/io/encodings/8-bit/summary.txt @@ -0,0 +1 @@ +Definitions of 8-bit encodings like ISO 8859 and Windows 1252 diff --git a/extra/io/encodings/latin1/tags.txt b/extra/io/encodings/8-bit/tags.txt similarity index 100% rename from extra/io/encodings/latin1/tags.txt rename to extra/io/encodings/8-bit/tags.txt diff --git a/extra/io/encodings/latin1/latin1-docs.factor b/extra/io/encodings/latin1/latin1-docs.factor deleted file mode 100644 index 5872b2bcfd..0000000000 --- a/extra/io/encodings/latin1/latin1-docs.factor +++ /dev/null @@ -1,5 +0,0 @@ -USING: help.syntax help.markup ; -IN: io.encodings.latin1 - -HELP: latin1 -{ $class-description "This class is used for Latin 1 (ISO 8859-1) encoding and decoding" } ; diff --git a/extra/io/encodings/latin1/latin1-tests.factor b/extra/io/encodings/latin1/latin1-tests.factor deleted file mode 100644 index a89bfe0e6f..0000000000 --- a/extra/io/encodings/latin1/latin1-tests.factor +++ /dev/null @@ -1,9 +0,0 @@ -USING: io.encodings.string io.encodings.latin1 tools.test strings arrays ; -IN: io.encodings.latin1.tests - -[ B{ CHAR: f CHAR: o CHAR: o } ] [ "foo" latin1 encode ] unit-test -[ { 256 } >string latin1 encode ] must-fail -[ B{ 255 } ] [ { 255 } latin1 encode ] unit-test - -[ "bar" ] [ "bar" latin1 decode ] unit-test -[ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } latin1 decode >array ] unit-test diff --git a/extra/io/encodings/latin1/latin1.factor b/extra/io/encodings/latin1/latin1.factor deleted file mode 100755 index 2b82318885..0000000000 --- a/extra/io/encodings/latin1/latin1.factor +++ /dev/null @@ -1,12 +0,0 @@ -! Copyright (C) 2008 Daniel Ehrenberg. -! See http://factorcode.org/license.txt for BSD license. -USING: io io.encodings kernel io.encodings.ascii.private ; -IN: io.encodings.latin1 - -TUPLE: latin1 ; - -M: latin1 encode-char - 256 encode-if< ; - -M: latin1 decode-char - drop stream-read1 ; diff --git a/extra/io/encodings/latin1/summary.txt b/extra/io/encodings/latin1/summary.txt deleted file mode 100644 index d40d628767..0000000000 --- a/extra/io/encodings/latin1/summary.txt +++ /dev/null @@ -1 +0,0 @@ -ISO 8859-1 encoding/decoding From 7adef0c61321a004bf93d0fb3c1241b75a4a44c1 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 21 Mar 2008 14:01:50 -0400 Subject: [PATCH 05/15] Completing 8-bit changes --- extra/io/encodings/8-bit/8-bit-tests.factor | 1 + extra/io/encodings/8-bit/8-bit.factor | 18 +- extra/io/encodings/8-bit/GSM0338.TXT | 239 -------------------- 3 files changed, 12 insertions(+), 246 deletions(-) delete mode 100644 extra/io/encodings/8-bit/GSM0338.TXT diff --git a/extra/io/encodings/8-bit/8-bit-tests.factor b/extra/io/encodings/8-bit/8-bit-tests.factor index 316e496219..5dbe28cb14 100644 --- a/extra/io/encodings/8-bit/8-bit-tests.factor +++ b/extra/io/encodings/8-bit/8-bit-tests.factor @@ -7,3 +7,4 @@ IN: io.encodings.8-bit.tests [ "bar" ] [ "bar" iso-8859-1 decode ] unit-test [ { CHAR: b 233 CHAR: r } ] [ { CHAR: b 233 CHAR: r } iso-8859-1 decode >array ] unit-test +[ { HEX: fffd HEX: 20AC } ] [ { HEX: 81 HEX: 80 } windows-1252 decode >array ] unit-test diff --git a/extra/io/encodings/8-bit/8-bit.factor b/extra/io/encodings/8-bit/8-bit.factor index ff0e6ec8bf..2cc6b2e57c 100644 --- a/extra/io/encodings/8-bit/8-bit.factor +++ b/extra/io/encodings/8-bit/8-bit.factor @@ -3,7 +3,7 @@ USING: math.parser arrays io.encodings sequences kernel assocs hashtables io.encodings.ascii combinators.cleave generic parser tuples words io io.files splitting namespaces -classes quotations ; +classes quotations math compiler.units ; IN: io.encodings.8-bit ] map ] map ; + [ "\t " split 2 head [ 2 tail-if hex> ] map ] map ; : byte>ch ( assoc -- array ) 256 replacement-char @@ -73,7 +75,9 @@ IN: io.encodings.8-bit \ encode-char [ encode-8-bit ] method-with-data ; : decode-8-bit ( stream encoding array -- char/f ) - nip swap stream-read1 [ swap nth ] [ drop f ] if* ; + nip swap stream-read1 + [ swap nth [ replacement-char ] unless* ] + [ drop f ] if* ; : define-decode-char ( class array -- ) \ decode-char [ decode-8-bit ] method-with-data ; @@ -86,4 +90,4 @@ IN: io.encodings.8-bit PRIVATE> -! << mappings [ define-8-bit-encoding ] assoc-each >> +[ mappings [ define-8-bit-encoding ] assoc-each ] with-compilation-unit diff --git a/extra/io/encodings/8-bit/GSM0338.TXT b/extra/io/encodings/8-bit/GSM0338.TXT deleted file mode 100644 index ae804d635a..0000000000 --- a/extra/io/encodings/8-bit/GSM0338.TXT +++ /dev/null @@ -1,239 +0,0 @@ -# -# Name: GSM 03.38 to Unicode -# Unicode version: 3.0 -# Table version: 1.1 -# Table format: Format A -# Date: 2000 May 30 -# Authors: Ken Whistler -# Kent Karlsson -# Markus Kuhn -# -# Copyright (c) 2000 Unicode, Inc. All Rights reserved. -# -# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). -# No claims are made as to fitness for any particular purpose. No -# warranties of any kind are expressed or implied. The recipient -# agrees to determine applicability of information provided. If this -# file has been provided on optical media by Unicode, Inc., the sole -# remedy for any claim will be exchange of defective media within 90 -# days of receipt. -# -# Unicode, Inc. hereby grants the right to freely use the information -# supplied in this file in the creation of products supporting the -# Unicode Standard, and to make copies of this file in any form for -# internal or external distribution as long as this notice remains -# attached. -# -# General notes: -# -# This table contains the data the Unicode Consortium has on how -# ETSI GSM 03.38 7-bit default alphabet characters map into Unicode. -# This mapping is based on ETSI TS 100 900 V7.2.0 (1999-07), with -# a correction of 0x09 to *small* c-cedilla, instead of *capital* -# C-cedilla. -# -# Format: Three tab-separated columns -# Column #1 is the ETSI GSM 03.38 7-bit default alphabet -# code (in hex as 0xXX, or 0xXXXX for double-byte -# sequences) -# Column #2 is the Unicode scalar value (in hex as 0xXXXX) -# Column #3 the Unicode name (follows a comment sign, '#') -# -# The entries are in ETSI GSM 03.38 7-bit default alphabet code order. -# -# Note that ETSI GSM 03.38 also allows for the use of UCS-2 (UTF-16 -# restricted to the BMP) in GSM/SMS messages. -# -# Note also that there are commented Greek mappings for some -# capital Latin characters. This follows from the clear intent -# of the ETSI GSM 03.38 to have glyph coverage for the uppercase -# Greek alphabet by reusing Latin letters that have the same -# form as an uppercase Greek letter. Conversion implementations -# should be aware of this fact. -# -# The ETSI GSM 03.38 specification shows an uppercase C-cedilla -# glyph at 0x09. This may be the result of limited display -# capabilities for handling characters with descenders. However, the -# language coverage intent is clearly for the lowercase c-cedilla, as shown -# in the mapping below. The mapping for uppercase C-cedilla is shown -# in a commented line in the mapping table. -# -# The ESC character 0x1B is -# mapped to the no-break space character, unless it is part of a -# valid ESC sequence, to facilitate round-trip compatibility in -# the presence of unknown ESC sequences. -# -# 0x00 is NULL (when followed only by 0x00 up to the -# end of (fixed byte length) message, possibly also up to -# FORM FEED. But 0x00 is also the code for COMMERCIAL AT -# when some other character (CARRIAGE RETURN if nothing else) -# comes after the 0x00. -# -# Version history -# 1.0 version: first creation -# 1.1 version: fixed problem with the wrong line being a comment, -# added text regarding 0x00's interpretation, -# added second mapping for C-cedilla, -# added mapping of 0x1B escape to NBSP for display. -# -# Updated versions of this file may be found in: -# -# -# Any comments or problems, contact -# Please note that is an archival address; -# notices will be checked, but do not expect an immediate response. -# -0x00 0x0040 # COMMERCIAL AT -#0x00 0x0000 # NULL (see note above) -0x01 0x00A3 # POUND SIGN -0x02 0x0024 # DOLLAR SIGN -0x03 0x00A5 # YEN SIGN -0x04 0x00E8 # LATIN SMALL LETTER E WITH GRAVE -0x05 0x00E9 # LATIN SMALL LETTER E WITH ACUTE -0x06 0x00F9 # LATIN SMALL LETTER U WITH GRAVE -0x07 0x00EC # LATIN SMALL LETTER I WITH GRAVE -0x08 0x00F2 # LATIN SMALL LETTER O WITH GRAVE -0x09 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA -#0x09 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA (see note above) -0x0A 0x000A # LINE FEED -0x0B 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE -0x0C 0x00F8 # LATIN SMALL LETTER O WITH STROKE -0x0D 0x000D # CARRIAGE RETURN -0x0E 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE -0x0F 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE -0x10 0x0394 # GREEK CAPITAL LETTER DELTA -0x11 0x005F # LOW LINE -0x12 0x03A6 # GREEK CAPITAL LETTER PHI -0x13 0x0393 # GREEK CAPITAL LETTER GAMMA -0x14 0x039B # GREEK CAPITAL LETTER LAMDA -0x15 0x03A9 # GREEK CAPITAL LETTER OMEGA -0x16 0x03A0 # GREEK CAPITAL LETTER PI -0x17 0x03A8 # GREEK CAPITAL LETTER PSI -0x18 0x03A3 # GREEK CAPITAL LETTER SIGMA -0x19 0x0398 # GREEK CAPITAL LETTER THETA -0x1A 0x039E # GREEK CAPITAL LETTER XI -0x1B 0x00A0 # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) -0x1B0A 0x000C # FORM FEED -0x1B14 0x005E # CIRCUMFLEX ACCENT -0x1B28 0x007B # LEFT CURLY BRACKET -0x1B29 0x007D # RIGHT CURLY BRACKET -0x1B2F 0x005C # REVERSE SOLIDUS -0x1B3C 0x005B # LEFT SQUARE BRACKET -0x1B3D 0x007E # TILDE -0x1B3E 0x005D # RIGHT SQUARE BRACKET -0x1B40 0x007C # VERTICAL LINE -0x1B65 0x20AC # EURO SIGN -0x1C 0x00C6 # LATIN CAPITAL LETTER AE -0x1D 0x00E6 # LATIN SMALL LETTER AE -0x1E 0x00DF # LATIN SMALL LETTER SHARP S (German) -0x1F 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE -0x20 0x0020 # SPACE -0x21 0x0021 # EXCLAMATION MARK -0x22 0x0022 # QUOTATION MARK -0x23 0x0023 # NUMBER SIGN -0x24 0x00A4 # CURRENCY SIGN -0x25 0x0025 # PERCENT SIGN -0x26 0x0026 # AMPERSAND -0x27 0x0027 # APOSTROPHE -0x28 0x0028 # LEFT PARENTHESIS -0x29 0x0029 # RIGHT PARENTHESIS -0x2A 0x002A # ASTERISK -0x2B 0x002B # PLUS SIGN -0x2C 0x002C # COMMA -0x2D 0x002D # HYPHEN-MINUS -0x2E 0x002E # FULL STOP -0x2F 0x002F # SOLIDUS -0x30 0x0030 # DIGIT ZERO -0x31 0x0031 # DIGIT ONE -0x32 0x0032 # DIGIT TWO -0x33 0x0033 # DIGIT THREE -0x34 0x0034 # DIGIT FOUR -0x35 0x0035 # DIGIT FIVE -0x36 0x0036 # DIGIT SIX -0x37 0x0037 # DIGIT SEVEN -0x38 0x0038 # DIGIT EIGHT -0x39 0x0039 # DIGIT NINE -0x3A 0x003A # COLON -0x3B 0x003B # SEMICOLON -0x3C 0x003C # LESS-THAN SIGN -0x3D 0x003D # EQUALS SIGN -0x3E 0x003E # GREATER-THAN SIGN -0x3F 0x003F # QUESTION MARK -0x40 0x00A1 # INVERTED EXCLAMATION MARK -0x41 0x0041 # LATIN CAPITAL LETTER A -#0x41 0x0391 # GREEK CAPITAL LETTER ALPHA -0x42 0x0042 # LATIN CAPITAL LETTER B -#0x42 0x0392 # GREEK CAPITAL LETTER BETA -0x43 0x0043 # LATIN CAPITAL LETTER C -0x44 0x0044 # LATIN CAPITAL LETTER D -0x45 0x0045 # LATIN CAPITAL LETTER E -#0x45 0x0395 # GREEK CAPITAL LETTER EPSILON -0x46 0x0046 # LATIN CAPITAL LETTER F -0x47 0x0047 # LATIN CAPITAL LETTER G -0x48 0x0048 # LATIN CAPITAL LETTER H -#0x48 0x0397 # GREEK CAPITAL LETTER ETA -0x49 0x0049 # LATIN CAPITAL LETTER I -#0x49 0x0399 # GREEK CAPITAL LETTER IOTA -0x4A 0x004A # LATIN CAPITAL LETTER J -0x4B 0x004B # LATIN CAPITAL LETTER K -#0x4B 0x039A # GREEK CAPITAL LETTER KAPPA -0x4C 0x004C # LATIN CAPITAL LETTER L -0x4D 0x004D # LATIN CAPITAL LETTER M -#0x4D 0x039C # GREEK CAPITAL LETTER MU -0x4E 0x004E # LATIN CAPITAL LETTER N -#0x4E 0x039D # GREEK CAPITAL LETTER NU -0x4F 0x004F # LATIN CAPITAL LETTER O -#0x4F 0x039F # GREEK CAPITAL LETTER OMICRON -0x50 0x0050 # LATIN CAPITAL LETTER P -#0x50 0x03A1 # GREEK CAPITAL LETTER RHO -0x51 0x0051 # LATIN CAPITAL LETTER Q -0x52 0x0052 # LATIN CAPITAL LETTER R -0x53 0x0053 # LATIN CAPITAL LETTER S -0x54 0x0054 # LATIN CAPITAL LETTER T -#0x54 0x03A4 # GREEK CAPITAL LETTER TAU -0x55 0x0055 # LATIN CAPITAL LETTER U -#0x55 0x03A5 # GREEK CAPITAL LETTER UPSILON -0x56 0x0056 # LATIN CAPITAL LETTER V -0x57 0x0057 # LATIN CAPITAL LETTER W -0x58 0x0058 # LATIN CAPITAL LETTER X -#0x58 0x03A7 # GREEK CAPITAL LETTER CHI -0x59 0x0059 # LATIN CAPITAL LETTER Y -0x5A 0x005A # LATIN CAPITAL LETTER Z -#0x5A 0x0396 # GREEK CAPITAL LETTER ZETA -0x5B 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS -0x5C 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS -0x5D 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE -0x5E 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS -0x5F 0x00A7 # SECTION SIGN -0x60 0x00BF # INVERTED QUESTION MARK -0x61 0x0061 # LATIN SMALL LETTER A -0x62 0x0062 # LATIN SMALL LETTER B -0x63 0x0063 # LATIN SMALL LETTER C -0x64 0x0064 # LATIN SMALL LETTER D -0x65 0x0065 # LATIN SMALL LETTER E -0x66 0x0066 # LATIN SMALL LETTER F -0x67 0x0067 # LATIN SMALL LETTER G -0x68 0x0068 # LATIN SMALL LETTER H -0x69 0x0069 # LATIN SMALL LETTER I -0x6A 0x006A # LATIN SMALL LETTER J -0x6B 0x006B # LATIN SMALL LETTER K -0x6C 0x006C # LATIN SMALL LETTER L -0x6D 0x006D # LATIN SMALL LETTER M -0x6E 0x006E # LATIN SMALL LETTER N -0x6F 0x006F # LATIN SMALL LETTER O -0x70 0x0070 # LATIN SMALL LETTER P -0x71 0x0071 # LATIN SMALL LETTER Q -0x72 0x0072 # LATIN SMALL LETTER R -0x73 0x0073 # LATIN SMALL LETTER S -0x74 0x0074 # LATIN SMALL LETTER T -0x75 0x0075 # LATIN SMALL LETTER U -0x76 0x0076 # LATIN SMALL LETTER V -0x77 0x0077 # LATIN SMALL LETTER W -0x78 0x0078 # LATIN SMALL LETTER X -0x79 0x0079 # LATIN SMALL LETTER Y -0x7A 0x007A # LATIN SMALL LETTER Z -0x7B 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS -0x7C 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS -0x7D 0x00F1 # LATIN SMALL LETTER N WITH TILDE -0x7E 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS -0x7F 0x00E0 # LATIN SMALL LETTER A WITH GRAVE From 88baf7c3b7a7c38bb699e8ff72cb09fc6ce17031 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 21 Mar 2008 14:07:17 -0400 Subject: [PATCH 06/15] latin1 -> iso-8859-1 --- core/io/io-tests.factor | 4 ++-- .../benchmark/reverse-complement/reverse-complement.factor | 6 +++--- extra/http/client/client.factor | 6 +++--- extra/http/server/server.factor | 4 ++-- extra/io/unix/launcher/launcher.factor | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/io/io-tests.factor b/core/io/io-tests.factor index 22c942d2d9..6200bd5235 100755 --- a/core/io/io-tests.factor +++ b/core/io/io-tests.factor @@ -1,5 +1,5 @@ USING: arrays io io.files kernel math parser strings system -tools.test words namespaces io.encodings.latin1 +tools.test words namespaces io.encodings.8-bit io.encodings.binary ; IN: io.tests @@ -9,7 +9,7 @@ IN: io.tests ] unit-test : ( resource -- stream ) - resource-path latin1 ; + resource-path iso-8859-1 ; [ "This is a line.\rThis is another line.\r" diff --git a/extra/benchmark/reverse-complement/reverse-complement.factor b/extra/benchmark/reverse-complement/reverse-complement.factor index 9c782e65e6..d83b720187 100755 --- a/extra/benchmark/reverse-complement/reverse-complement.factor +++ b/extra/benchmark/reverse-complement/reverse-complement.factor @@ -1,6 +1,6 @@ USING: io io.files io.streams.duplex kernel sequences sequences.private strings vectors words memoize splitting -hints unicode.case continuations io.encodings.latin1 ; +hints unicode.case continuations io.encodings.ascii ; IN: benchmark.reverse-complement MEMO: trans-map ( -- str ) @@ -32,8 +32,8 @@ HINTS: do-line vector string ; readln [ do-line (reverse-complement) ] [ show-seq ] if* ; : reverse-complement ( infile outfile -- ) - latin1 [ - swap latin1 [ + ascii [ + swap ascii [ swap [ 500000 (reverse-complement) ] with-stream diff --git a/extra/http/client/client.factor b/extra/http/client/client.factor index fc85cce3ad..233b61ea74 100755 --- a/extra/http/client/client.factor +++ b/extra/http/client/client.factor @@ -3,7 +3,7 @@ USING: assocs http kernel math math.parser namespaces sequences io io.sockets io.streams.string io.files io.timeouts strings splitting calendar continuations accessors vectors -io.encodings.latin1 io.encodings.binary fry ; +io.encodings.8-bit io.encodings.binary fry ; IN: http.client DEFER: http-request @@ -52,7 +52,7 @@ PRIVATE> : http-request ( request -- response stream ) dup request [ - dup request-addr latin1 + dup request-addr iso-8859-1 1 minutes over set-timeout [ write-request flush @@ -82,7 +82,7 @@ PRIVATE> : download-to ( url file -- ) #! Downloads the contents of a URL to a file. swap http-get-stream swap check-response - [ swap latin1 stream-copy ] with-disposal ; + [ swap iso-8859-1 stream-copy ] with-disposal ; : download ( url -- ) dup download-name download-to ; diff --git a/extra/http/server/server.factor b/extra/http/server/server.factor index 6b3ae52730..3df21adf26 100755 --- a/extra/http/server/server.factor +++ b/extra/http/server/server.factor @@ -4,7 +4,7 @@ USING: assocs kernel namespaces io io.timeouts strings splitting threads http sequences prettyprint io.server logging calendar html.elements accessors math.parser combinators.lib tools.vocabs debugger html continuations random combinators -destructors io.encodings.latin1 fry combinators.cleave ; +destructors io.encodings.8-bit fry combinators.cleave ; IN: http.server GENERIC: call-responder ( path responder -- response ) @@ -217,7 +217,7 @@ SYMBOL: exit-continuation : httpd ( port -- ) internet-server "http.server" - latin1 [ handle-client ] with-server ; + iso-8859-1 [ handle-client ] with-server ; : httpd-main ( -- ) 8888 httpd ; diff --git a/extra/io/unix/launcher/launcher.factor b/extra/io/unix/launcher/launcher.factor index a1e42fddf2..8ed1c957af 100755 --- a/extra/io/unix/launcher/launcher.factor +++ b/extra/io/unix/launcher/launcher.factor @@ -4,7 +4,7 @@ USING: io io.backend io.launcher io.nonblocking io.unix.backend io.unix.files io.nonblocking sequences kernel namespaces math system alien.c-types debugger continuations arrays assocs combinators unix.process strings threads unix -io.unix.launcher.parser io.encodings.latin1 accessors ; +io.unix.launcher.parser accessors ; IN: io.unix.launcher ! Search unix first From fae69bd0920a1a53c411ff02b720585b9183c2c8 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 21 Mar 2008 16:57:13 -0400 Subject: [PATCH 07/15] Final fix for 8-bit encodings --- core/io/encodings/encodings-docs.factor | 9 +++++---- extra/io/encodings/8-bit/CP037.TXT | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/io/encodings/encodings-docs.factor b/core/io/encodings/encodings-docs.factor index 548d2cd7fc..5d1068d496 100644 --- a/core/io/encodings/encodings-docs.factor +++ b/core/io/encodings/encodings-docs.factor @@ -37,10 +37,11 @@ HELP: ( stream-in stream-out encoding -- duplex ) ARTICLE: "encodings-descriptors" "Encoding descriptors" "An encoding descriptor is something which can be used for input or output streams to encode or decode files. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:" -$nl { $vocab-link "io.encodings.utf8" } -$nl { $vocab-link "io.encodings.ascii" } -$nl { $vocab-link "io.encodings.binary" } -$nl { $vocab-link "io.encodings.utf16" } ; +{ $vocab-subsection "io.encodings.utf8" } +{ $vocab-subsection "io.encodings.ascii" } +{ $vocab-subsection "io.encodings.8-bit" } +{ $vocab-subsection "io.encodings.binary" } +{ $vocab-subsection "io.encodings.utf16" } ; ARTICLE: "encodings-protocol" "Encoding protocol" "An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again." diff --git a/extra/io/encodings/8-bit/CP037.TXT b/extra/io/encodings/8-bit/CP037.TXT index 48fde2ae69..43186f7bf9 100644 --- a/extra/io/encodings/8-bit/CP037.TXT +++ b/extra/io/encodings/8-bit/CP037.TXT @@ -271,5 +271,3 @@ 0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE 0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE 0xFF 0x009F #CONTROL - - \ No newline at end of file From 8d7ccf2596bfca71ed9d50849a70e4cc371d7f0a Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 21 Mar 2008 17:48:01 -0500 Subject: [PATCH 08/15] Add unit test for ifte --- extra/combinators/lib/lib-tests.factor | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/extra/combinators/lib/lib-tests.factor b/extra/combinators/lib/lib-tests.factor index 0a08948346..ed481f72e6 100755 --- a/extra/combinators/lib/lib-tests.factor +++ b/extra/combinators/lib/lib-tests.factor @@ -46,3 +46,8 @@ IN: combinators.lib.tests [ dup array? ] [ dup vector? ] [ dup float? ] } || nip ] unit-test + + +{ 1 1 } [ + [ even? ] [ drop 1 ] [ drop 2 ] ifte +] must-infer-as From 86efc8467c1959725813b46edd6c7bc6e7ca6c89 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 21 Mar 2008 21:47:16 -0400 Subject: [PATCH 09/15] Strict wrapper for encodings --- extra/io/encodings/strict/authors.txt | 1 + extra/io/encodings/strict/strict-tests.factor | 6 ++++++ extra/io/encodings/strict/strict.factor | 18 ++++++++++++++++++ extra/io/encodings/strict/summary.txt | 1 + extra/io/encodings/strict/tags.txt | 1 + 5 files changed, 27 insertions(+) create mode 100644 extra/io/encodings/strict/authors.txt create mode 100644 extra/io/encodings/strict/strict-tests.factor create mode 100644 extra/io/encodings/strict/strict.factor create mode 100644 extra/io/encodings/strict/summary.txt create mode 100644 extra/io/encodings/strict/tags.txt diff --git a/extra/io/encodings/strict/authors.txt b/extra/io/encodings/strict/authors.txt new file mode 100644 index 0000000000..f990dd0ed2 --- /dev/null +++ b/extra/io/encodings/strict/authors.txt @@ -0,0 +1 @@ +Daniel Ehrenberg diff --git a/extra/io/encodings/strict/strict-tests.factor b/extra/io/encodings/strict/strict-tests.factor new file mode 100644 index 0000000000..aebb58cc30 --- /dev/null +++ b/extra/io/encodings/strict/strict-tests.factor @@ -0,0 +1,6 @@ +USING: io.encodings.strict io.encodings.ascii tools.test +arrays io.encodings.string ; +IN: io.encodings.strict.test + +[ { HEX: fffd } ] [ { 128 } ascii decode >array ] unit-test +[ { 128 } ascii strict decode ] must-fail diff --git a/extra/io/encodings/strict/strict.factor b/extra/io/encodings/strict/strict.factor new file mode 100644 index 0000000000..89c10d89cc --- /dev/null +++ b/extra/io/encodings/strict/strict.factor @@ -0,0 +1,18 @@ +! Copyright (C) 2008 Daniel Ehrenberg +! See http://factorcode.org/license.txt for BSD license. +USING: io.encodings kernel accessors inspector ; +IN: io.encodings.strict + +TUPLE: strict code ; +C: strict strict + +TUPLE: decode-error ; +: decode-error ( -- * ) \ decode-error construct-empty throw ; +M: decode-error summary + drop "Error in decoding input stream" ; + +M: strict + code>> [ strict ] change-code ; + +M: strict decode-char + code>> decode-char dup replacement-char = [ decode-error ] when ; diff --git a/extra/io/encodings/strict/summary.txt b/extra/io/encodings/strict/summary.txt new file mode 100644 index 0000000000..9fd0fe3bf1 --- /dev/null +++ b/extra/io/encodings/strict/summary.txt @@ -0,0 +1 @@ +Strict wrapper for encodings diff --git a/extra/io/encodings/strict/tags.txt b/extra/io/encodings/strict/tags.txt new file mode 100644 index 0000000000..8e27be7d61 --- /dev/null +++ b/extra/io/encodings/strict/tags.txt @@ -0,0 +1 @@ +text From 99b9ab367bc330969fa055e504da1f2652ac4e70 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 24 Mar 2008 18:02:39 -0500 Subject: [PATCH 10/15] Move priority code to io.launcher --- extra/io/launcher/launcher-docs.factor | 11 +++++++++++ extra/io/launcher/launcher.factor | 9 ++++++++- extra/io/process/process.factor | 17 ----------------- extra/io/unix/launcher/launcher.factor | 19 +++++++++++++++++-- extra/io/unix/process/process.factor | 19 ------------------- extra/io/windows/process/process.factor | 8 -------- extra/unix/process/process.factor | 5 ++++- 7 files changed, 40 insertions(+), 48 deletions(-) delete mode 100644 extra/io/process/process.factor delete mode 100644 extra/io/unix/process/process.factor delete mode 100644 extra/io/windows/process/process.factor diff --git a/extra/io/launcher/launcher-docs.factor b/extra/io/launcher/launcher-docs.factor index 7fdd22c8a5..640801234b 100755 --- a/extra/io/launcher/launcher-docs.factor +++ b/extra/io/launcher/launcher-docs.factor @@ -33,6 +33,17 @@ $nl { "a file stream or a socket - the stream is connected to the given Factor stream, which cannot be used again from within Factor and must be closed after the process has been started" } } ; +ARTICLE: "io.launcher.priority" "Setting process priority" +"The priority of the child process can be set by storing one of the below symbols in the " { $snippet "priority" } " slot of a " { $link process } " tuple:" +{ $list + { $link +lowest-priority+ } + { $link +low-priority+ } + { $link +normal-priority+ } + { $link +high-priority+ } + { $link +highest-priority+ } +} +"The default value is " { $link f } ", which denotes that the child process should inherit the current process priority." ; + HELP: +closed+ { $description "Possible value for the " { $snippet "stdin" } ", " { $snippet "stdout" } ", and " { $snippet "stderr" } " slots of a " { $link process } "." } ; diff --git a/extra/io/launcher/launcher.factor b/extra/io/launcher/launcher.factor index 9c7d64934e..ac8dc15661 100755 --- a/extra/io/launcher/launcher.factor +++ b/extra/io/launcher/launcher.factor @@ -6,7 +6,6 @@ init threads continuations math io.encodings io.streams.duplex io.nonblocking accessors ; IN: io.launcher - TUPLE: process command @@ -19,6 +18,8 @@ stdin stdout stderr +priority + timeout handle status @@ -32,6 +33,12 @@ SYMBOL: +prepend-environment+ SYMBOL: +replace-environment+ SYMBOL: +append-environment+ +SYMBOL: +lowest-priority+ +SYMBOL: +low-priority+ +SYMBOL: +normal-priority+ +SYMBOL: +high-priority+ +SYMBOL: +highest-priority+ + : ( -- process ) process construct-empty H{ } clone >>environment diff --git a/extra/io/process/process.factor b/extra/io/process/process.factor deleted file mode 100644 index 8a7c5b1a11..0000000000 --- a/extra/io/process/process.factor +++ /dev/null @@ -1,17 +0,0 @@ -USING: io.backend kernel ; -IN: io.priority - -SYMBOL: +lowest-priority+ -SYMBOL: +low-priority+ -SYMBOL: +normal-priority+ -SYMBOL: +high-priority+ -SYMBOL: +highest-priority+ - -HOOK: current-priority io-backend ( -- symbol ) -HOOK: set-current-priority io-backend ( symbol -- ) -HOOK: priority-values ( -- assoc ) - -: lookup-priority ( symbol -- n ) - priority-values at ; - -HOOK: get-process-list io-backend ( -- assoc ) diff --git a/extra/io/unix/launcher/launcher.factor b/extra/io/unix/launcher/launcher.factor index 8ed1c957af..e16ecde6fa 100755 --- a/extra/io/unix/launcher/launcher.factor +++ b/extra/io/unix/launcher/launcher.factor @@ -16,6 +16,17 @@ USE: unix : assoc>env ( assoc -- env ) [ "=" swap 3append ] { } assoc>map ; +: setup-priority ( process -- process ) + dup priority>> [ + H{ + { +lowest-priority+ 20 } + { +low-priority+ 10 } + { +normal-priority+ 0 } + { +high-priority+ -10 } + { +highest-priority+ -20 } + } at set-priority + ] when* ; + : redirect-fd ( oldfd fd -- ) 2dup = [ 2drop ] [ dupd dup2 io-error close ] if ; @@ -47,11 +58,15 @@ USE: unix : setup-redirection ( process -- process ) dup stdin>> ?closed read-flags 0 redirect dup stdout>> ?closed write-flags 1 redirect - dup stderr>> dup +stdout+ eq? - [ drop 1 2 dup2 io-error ] [ ?closed write-flags 2 redirect ] if ; + dup stderr>> dup +stdout+ eq? [ + drop 1 2 dup2 io-error + ] [ + ?closed write-flags 2 redirect + ] if ; : spawn-process ( process -- * ) [ + setup-priority setup-redirection dup pass-environment? [ dup get-environment set-os-envs diff --git a/extra/io/unix/process/process.factor b/extra/io/unix/process/process.factor deleted file mode 100644 index 00df6b6f52..0000000000 --- a/extra/io/unix/process/process.factor +++ /dev/null @@ -1,19 +0,0 @@ -USING: alien.syntax kernel io.process io.unix.backend -unix ; -IN: io.unix.process - -M: unix-io current-priority ( -- n ) - clear_err_no - 0 0 getpriority dup -1 = [ check-errno ] when ; - -M: unix-io set-current-priority ( n -- ) - 0 0 rot setpriority io-error ; - -M: unix-io priority-values ( -- assoc ) - { - { +lowest-priority+ 20 } - { +low-priority+ 10 } - { +normal-priority+ 0 } - { +high-priority+ -10 } - { +highest-priority+ -20 } - } ; diff --git a/extra/io/windows/process/process.factor b/extra/io/windows/process/process.factor deleted file mode 100644 index f0ca04fd8a..0000000000 --- a/extra/io/windows/process/process.factor +++ /dev/null @@ -1,8 +0,0 @@ -USING: kernel ; -IN: io.windows.process - -M: windows-io current-priority ( -- n ) - ; - -M: windows-io set-current-priority ( n -- ) - ; diff --git a/extra/unix/process/process.factor b/extra/unix/process/process.factor index 6fdc8e358b..c9612c4384 100755 --- a/extra/unix/process/process.factor +++ b/extra/unix/process/process.factor @@ -33,4 +33,7 @@ IN: unix.process fork dup io-error dup zero? -roll swap curry if ; inline : wait-for-pid ( pid -- status ) - 0 [ 0 waitpid drop ] keep *int WEXITSTATUS ; \ No newline at end of file + 0 [ 0 waitpid drop ] keep *int WEXITSTATUS ; + +: set-priority ( n -- ) + 0 0 rot setpriority io-error ; \ No newline at end of file From 09d8c8eb88b86f6cea48ab68662484d2f625fd85 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 24 Mar 2008 19:47:30 -0500 Subject: [PATCH 11/15] Launcher documentation --- extra/io/launcher/launcher-docs.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/extra/io/launcher/launcher-docs.factor b/extra/io/launcher/launcher-docs.factor index 640801234b..0f6ca3a2c9 100755 --- a/extra/io/launcher/launcher-docs.factor +++ b/extra/io/launcher/launcher-docs.factor @@ -227,6 +227,7 @@ ARTICLE: "io.launcher" "Operating system processes" { $subsection "io.launcher.detached" } { $subsection "io.launcher.environment" } { $subsection "io.launcher.redirection" } +{ $subsection "io.launcher.priority" } { $subsection "io.launcher.timeouts" } ; ABOUT: "io.launcher" From 8d7367674c42eaadb26d3883bb2fca17e52c2dfb Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 24 Mar 2008 19:52:21 -0500 Subject: [PATCH 12/15] Class algebra refactoring --- core/bootstrap/image/image.factor | 6 +- core/bootstrap/primitives.factor | 12 +- core/classes/algebra/algebra-docs.factor | 55 ++++ core/classes/algebra/algebra-tests.factor | 201 ++++++++++++++ core/classes/algebra/algebra.factor | 233 ++++++++++++++++ core/classes/classes-docs.factor | 82 +----- core/classes/classes-tests.factor | 85 +----- core/classes/classes.factor | 257 +++--------------- core/generator/registers/registers.factor | 21 +- core/generic/generic-docs.factor | 6 +- core/generic/generic-tests.factor | 4 +- core/generic/generic.factor | 4 +- core/generic/math/math.factor | 8 +- core/generic/standard/standard.factor | 2 +- core/inference/class/class.factor | 11 +- core/optimizer/control/control.factor | 4 +- core/optimizer/inlining/inlining.factor | 10 +- core/optimizer/known-words/known-words.factor | 9 +- core/optimizer/math/math.factor | 7 +- .../pattern-match/pattern-match.factor | 2 +- core/tuples/tuples-tests.factor | 11 +- extra/tools/deploy/shaker/shaker.factor | 8 +- 22 files changed, 593 insertions(+), 445 deletions(-) create mode 100755 core/classes/algebra/algebra-docs.factor create mode 100755 core/classes/algebra/algebra-tests.factor create mode 100755 core/classes/algebra/algebra.factor mode change 100644 => 100755 core/optimizer/pattern-match/pattern-match.factor diff --git a/core/bootstrap/image/image.factor b/core/bootstrap/image/image.factor index 52a2496755..6aa4b9212d 100755 --- a/core/bootstrap/image/image.factor +++ b/core/bootstrap/image/image.factor @@ -348,8 +348,10 @@ M: curry ' : emit-global ( -- ) [ { - dictionary source-files - typemap builtins class builtins set +init-caches ! Vocabulary for slot accessors "accessors" create-vocab drop @@ -93,11 +97,6 @@ call "vectors.private" } [ create-vocab drop ] each -H{ } clone source-files set -H{ } clone update-map set -H{ } clone class define-builtin-slots ; -H{ } clone typemap set -num-types get f builtins set - ! Forward definitions "object" "kernel" create t "class" set-word-prop "object" "kernel" create union-class "metaclass" set-word-prop diff --git a/core/classes/algebra/algebra-docs.factor b/core/classes/algebra/algebra-docs.factor new file mode 100755 index 0000000000..c21098916d --- /dev/null +++ b/core/classes/algebra/algebra-docs.factor @@ -0,0 +1,55 @@ +USING: help.markup help.syntax kernel classes ; +IN: classes.algebra + +ARTICLE: "class-operations" "Class operations" +"Set-theoretic operations on classes:" +{ $subsection class< } +{ $subsection class-and } +{ $subsection class-or } +{ $subsection classes-intersect? } +"Topological sort:" +{ $subsection sort-classes } +{ $subsection min-class } +"Low-level implementation detail:" +{ $subsection class-types } +{ $subsection flatten-class } +{ $subsection flatten-builtin-class } +{ $subsection class-types } +{ $subsection class-tags } ; + +HELP: flatten-builtin-class +{ $values { "class" class } { "assoc" "an assoc whose keys are classes" } } +{ $description "Outputs a set of tuple classes whose union is the smallest cover of " { $snippet "class" } " intersected with " { $link tuple } "." } ; + +HELP: flatten-class +{ $values { "class" class } { "assoc" "an assoc whose keys are classes" } } +{ $description "Outputs a set of builtin and tuple classes whose union is the smallest cover of " { $snippet "class" } "." } ; + +HELP: class-types +{ $values { "class" class } { "seq" "an increasing sequence of integers" } } +{ $description "Outputs a sequence of builtin type numbers whose instances can possibly be instances of the given class." } ; + +HELP: class< +{ $values { "class1" "a class" } { "class2" "a class" } { "?" "a boolean" } } +{ $description "Tests if all instances of " { $snippet "class1" } " are also instances of " { $snippet "class2" } "." } +{ $notes "Classes are partially ordered. This means that if " { $snippet "class1 <= class2" } " and " { $snippet "class2 <= class1" } ", then " { $snippet "class1 = class2" } ". Also, if " { $snippet "class1 <= class2" } " and " { $snippet "class2 <= class3" } ", then " { $snippet "class1 <= class3" } "." } ; + +HELP: sort-classes +{ $values { "seq" "a sequence of class" } { "newseq" "a new seqence of classes" } } +{ $description "Outputs a topological sort of a sequence of classes. Larger classes come before their subclasses." } ; + +HELP: class-or +{ $values { "class1" class } { "class2" class } { "class" class } } +{ $description "Outputs the smallest anonymous class containing both " { $snippet "class1" } " and " { $snippet "class2" } "." } ; + +HELP: class-and +{ $values { "class1" class } { "class2" class } { "class" class } } +{ $description "Outputs the largest anonymous class contained in both " { $snippet "class1" } " and " { $snippet "class2" } "." } ; + +HELP: classes-intersect? +{ $values { "class1" class } { "class2" class } { "?" "a boolean" } } +{ $description "Tests if two classes have a non-empty intersection. If the intersection is empty, no object can be an instance of both classes at once." } ; + +HELP: min-class +{ $values { "class" class } { "seq" "a sequence of class words" } { "class/f" "a class word or " { $link f } } } +{ $description "If all classes in " { $snippet "seq" } " that intersect " { $snippet "class" } " are subtypes of " { $snippet "class" } ", outputs the last such element of " { $snippet "seq" } ". If any conditions fail to hold, outputs " { $link f } "." } ; diff --git a/core/classes/algebra/algebra-tests.factor b/core/classes/algebra/algebra-tests.factor new file mode 100755 index 0000000000..24a18559fe --- /dev/null +++ b/core/classes/algebra/algebra-tests.factor @@ -0,0 +1,201 @@ +IN: classes.algebra.tests +USING: alien arrays definitions generic assocs hashtables io +kernel math namespaces parser prettyprint sequences strings +tools.test vectors words quotations classes classes.algebra +classes.private classes.union classes.mixin classes.predicate +vectors definitions source-files compiler.units growable +random inference effects ; + +: class= [ class< ] 2keep swap class< and ; + +: class-and* >r class-and r> class= ; + +: class-or* >r class-or r> class= ; + +[ t ] [ object object object class-and* ] unit-test +[ t ] [ fixnum object fixnum class-and* ] unit-test +[ t ] [ object fixnum fixnum class-and* ] unit-test +[ t ] [ fixnum fixnum fixnum class-and* ] unit-test +[ t ] [ fixnum integer fixnum class-and* ] unit-test +[ t ] [ integer fixnum fixnum class-and* ] unit-test + +[ t ] [ vector fixnum null class-and* ] unit-test +[ t ] [ number object number class-and* ] unit-test +[ t ] [ object number number class-and* ] unit-test +[ t ] [ slice reversed null class-and* ] unit-test +[ t ] [ general-t \ f null class-and* ] unit-test +[ t ] [ general-t \ f object class-or* ] unit-test + +TUPLE: first-one ; +TUPLE: second-one ; +UNION: both first-one union-class ; + +[ t ] [ both tuple classes-intersect? ] unit-test +[ t ] [ vector virtual-sequence null class-and* ] unit-test +[ f ] [ vector virtual-sequence classes-intersect? ] unit-test + +[ t ] [ number vector class-or sequence classes-intersect? ] unit-test + +[ f ] [ number vector class-and sequence classes-intersect? ] unit-test + +[ t ] [ \ fixnum \ integer class< ] unit-test +[ t ] [ \ fixnum \ fixnum class< ] unit-test +[ f ] [ \ integer \ fixnum class< ] unit-test +[ t ] [ \ integer \ object class< ] unit-test +[ f ] [ \ integer \ null class< ] unit-test +[ t ] [ \ null \ object class< ] unit-test + +[ t ] [ \ generic \ word class< ] unit-test +[ f ] [ \ word \ generic class< ] unit-test + +[ f ] [ \ reversed \ slice class< ] unit-test +[ f ] [ \ slice \ reversed class< ] unit-test + +PREDICATE: word no-docs "documentation" word-prop not ; + +UNION: no-docs-union no-docs integer ; + +[ t ] [ no-docs no-docs-union class< ] unit-test +[ f ] [ no-docs-union no-docs class< ] unit-test + +TUPLE: a ; +TUPLE: b ; +UNION: c a b ; + +[ t ] [ \ c \ tuple class< ] unit-test +[ f ] [ \ tuple \ c class< ] unit-test + +[ t ] [ \ tuple-class \ class class< ] unit-test +[ f ] [ \ class \ tuple-class class< ] unit-test + +TUPLE: delegate-clone ; + +[ t ] [ \ null \ delegate-clone class< ] unit-test +[ f ] [ \ object \ delegate-clone class< ] unit-test +[ f ] [ \ object \ delegate-clone class< ] unit-test +[ t ] [ \ delegate-clone \ tuple class< ] unit-test +[ f ] [ \ tuple \ delegate-clone class< ] unit-test + +TUPLE: a1 ; +TUPLE: b1 ; +TUPLE: c1 ; + +UNION: x1 a1 b1 ; +UNION: y1 a1 c1 ; +UNION: z1 b1 c1 ; + +[ f ] [ z1 x1 y1 class-and class< ] unit-test + +[ t ] [ x1 y1 class-and a1 class< ] unit-test + +[ f ] [ y1 z1 class-and x1 classes-intersect? ] unit-test + +[ f ] [ b1 c1 class-or a1 b1 class-or a1 c1 class-and class-and class< ] unit-test + +[ t ] [ a1 b1 class-or a1 c1 class-or class-and a1 class< ] unit-test + +[ f ] [ a1 c1 class-or b1 c1 class-or class-and a1 b1 class-or classes-intersect? ] unit-test + +[ f ] [ growable hi-tag classes-intersect? ] unit-test + +[ t ] [ + growable tuple sequence class-and class< +] unit-test + +[ t ] [ + growable assoc class-and tuple class< +] unit-test + +[ t ] [ object \ f \ f class-not class-or class< ] unit-test + +[ t ] [ fixnum class-not integer class-and bignum class= ] unit-test + +[ f ] [ integer integer class-not classes-intersect? ] unit-test + +[ t ] [ array number class-not class< ] unit-test + +[ f ] [ bignum number class-not class< ] unit-test + +[ vector ] [ vector class-not class-not ] unit-test + +[ t ] [ fixnum fixnum bignum class-or class< ] unit-test + +[ f ] [ fixnum class-not integer class-and array class< ] unit-test + +[ f ] [ fixnum class-not integer class< ] unit-test + +[ f ] [ number class-not array class< ] unit-test + +[ f ] [ fixnum class-not array class< ] unit-test + +[ t ] [ number class-not integer class-not class< ] unit-test + +[ t ] [ vector array class-not class-and vector class= ] unit-test + +[ f ] [ fixnum class-not number class-and array classes-intersect? ] unit-test + +[ f ] [ fixnum class-not integer class< ] unit-test + +[ t ] [ null class-not object class= ] unit-test + +[ t ] [ object class-not null class= ] unit-test + +[ f ] [ object class-not object class= ] unit-test + +[ f ] [ null class-not null class= ] unit-test + +! Test for hangs? +: random-class classes random ; + +: random-op + { + class-and + class-or + class-not + } random ; + +10 [ + [ ] [ + 20 [ drop random-op ] map >quotation + [ infer effect-in [ random-class ] times ] keep + call + drop + ] unit-test +] times + +: random-boolean + { t f } random ; + +: boolean>class + object null ? ; + +: random-boolean-op + { + and + or + not + xor + } random ; + +: class-xor [ class-or ] 2keep class-and class-not class-and ; + +: boolean-op>class-op + { + { and class-and } + { or class-or } + { not class-not } + { xor class-xor } + } at ; + +20 [ + [ t ] [ + 20 [ drop random-boolean-op ] [ ] map-as dup . + [ infer effect-in [ drop random-boolean ] map dup . ] keep + + [ >r [ ] each r> call ] 2keep + + >r [ boolean>class ] each r> [ boolean-op>class-op ] map call object class= + + = + ] unit-test +] times diff --git a/core/classes/algebra/algebra.factor b/core/classes/algebra/algebra.factor new file mode 100755 index 0000000000..e2206213a6 --- /dev/null +++ b/core/classes/algebra/algebra.factor @@ -0,0 +1,233 @@ +! Copyright (C) 2004, 2008 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel classes combinators accessors sequences arrays +vectors assocs namespaces words sorting layouts math hashtables +; +IN: classes.algebra + +: 2cache ( key1 key2 assoc quot -- value ) + >r >r 2array r> [ first2 ] r> compose cache ; inline + +DEFER: (class<) + +: class< ( first second -- ? ) + class<-cache get [ (class<) ] 2cache ; + +DEFER: (class-not) + +: class-not ( class -- complement ) + class-not-cache get [ (class-not) ] cache ; + +DEFER: (classes-intersect?) ( first second -- ? ) + +: classes-intersect? ( first second -- ? ) + classes-intersect-cache get [ (classes-intersect?) ] 2cache ; + +DEFER: (class-and) + +: class-and ( first second -- class ) + class-and-cache get [ (class-and) ] 2cache ; + +DEFER: (class-or) + +: class-or ( first second -- class ) + class-or-cache get [ (class-or) ] 2cache ; + +TUPLE: anonymous-union members ; + +C: anonymous-union + +TUPLE: anonymous-intersection members ; + +C: anonymous-intersection + +TUPLE: anonymous-complement class ; + +C: anonymous-complement + +: superclass< ( first second -- ? ) + >r superclass r> class< ; + +: left-union-class< ( first second -- ? ) + >r members r> [ class< ] curry all? ; + +: right-union-class< ( first second -- ? ) + members [ class< ] with contains? ; + +: left-anonymous-union< ( first second -- ? ) + >r members>> r> [ class< ] curry all? ; + +: right-anonymous-union< ( first second -- ? ) + members>> [ class< ] with contains? ; + +: left-anonymous-intersection< ( first second -- ? ) + >r members>> r> [ class< ] curry contains? ; + +: right-anonymous-intersection< ( first second -- ? ) + members>> [ class< ] with all? ; + +: anonymous-complement< ( first second -- ? ) + [ class>> ] 2apply swap class< ; + +: (class<) ( first second -- -1/0/1 ) + { + { [ 2dup eq? ] [ 2drop t ] } + { [ dup object eq? ] [ 2drop t ] } + { [ over null eq? ] [ 2drop t ] } + { [ 2dup [ anonymous-complement? ] both? ] [ anonymous-complement< ] } + { [ over anonymous-union? ] [ left-anonymous-union< ] } + { [ over anonymous-intersection? ] [ left-anonymous-intersection< ] } + { [ over anonymous-complement? ] [ 2drop f ] } + { [ over members ] [ left-union-class< ] } + { [ dup anonymous-union? ] [ right-anonymous-union< ] } + { [ dup anonymous-intersection? ] [ right-anonymous-intersection< ] } + { [ dup anonymous-complement? ] [ class>> classes-intersect? not ] } + { [ dup members ] [ right-union-class< ] } + { [ over superclass ] [ superclass< ] } + { [ t ] [ 2drop f ] } + } cond ; + +: anonymous-union-intersect? ( first second -- ? ) + members>> [ classes-intersect? ] with contains? ; + +: anonymous-intersection-intersect? ( first second -- ? ) + members>> [ classes-intersect? ] with all? ; + +: anonymous-complement-intersect? ( first second -- ? ) + class>> class< not ; + +: union-class-intersect? ( first second -- ? ) + members [ classes-intersect? ] with contains? ; + +: tuple-class-intersect? ( first second -- ? ) + { + { [ over tuple eq? ] [ 2drop t ] } + { [ over builtin-class? ] [ 2drop f ] } + { [ over tuple-class? ] [ [ class< ] 2keep swap class< or ] } + { [ t ] [ swap classes-intersect? ] } + } cond ; + +: builtin-class-intersect? ( first second -- ? ) + { + { [ 2dup eq? ] [ 2drop t ] } + { [ over builtin-class? ] [ 2drop f ] } + { [ t ] [ swap classes-intersect? ] } + } cond ; + +: (classes-intersect?) ( first second -- ? ) + { + { [ dup anonymous-union? ] [ anonymous-union-intersect? ] } + { [ dup anonymous-intersection? ] [ anonymous-intersection-intersect? ] } + { [ dup anonymous-complement? ] [ anonymous-complement-intersect? ] } + { [ dup tuple-class? ] [ tuple-class-intersect? ] } + { [ dup builtin-class? ] [ builtin-class-intersect? ] } + { [ dup superclass ] [ superclass classes-intersect? ] } + { [ dup members ] [ union-class-intersect? ] } + } cond ; + +: left-union-and ( first second -- class ) + >r members r> [ class-and ] curry map ; + +: right-union-and ( first second -- class ) + members [ class-and ] with map ; + +: left-anonymous-union-and ( first second -- class ) + >r members>> r> [ class-and ] curry map ; + +: right-anonymous-union-and ( first second -- class ) + members>> [ class-and ] with map ; + +: left-anonymous-intersection-and ( first second -- class ) + >r members>> r> add ; + +: right-anonymous-intersection-and ( first second -- class ) + members>> swap add ; + +: (class-and) ( first second -- class ) + { + { [ 2dup class< ] [ drop ] } + { [ 2dup swap class< ] [ nip ] } + { [ 2dup classes-intersect? not ] [ 2drop null ] } + { [ dup members ] [ right-union-and ] } + { [ dup anonymous-union? ] [ right-anonymous-union-and ] } + { [ dup anonymous-intersection? ] [ right-anonymous-intersection-and ] } + { [ over members ] [ left-union-and ] } + { [ over anonymous-union? ] [ left-anonymous-union-and ] } + { [ over anonymous-intersection? ] [ left-anonymous-intersection-and ] } + { [ t ] [ 2array ] } + } cond ; + +: left-anonymous-union-or ( first second -- class ) + >r members>> r> add ; + +: right-anonymous-union-or ( first second -- class ) + members>> swap add ; + +: (class-or) ( first second -- class ) + { + { [ 2dup class< ] [ nip ] } + { [ 2dup swap class< ] [ drop ] } + { [ dup anonymous-union? ] [ right-anonymous-union-or ] } + { [ over anonymous-union? ] [ left-anonymous-union-or ] } + { [ t ] [ 2array ] } + } cond ; + +: (class-not) ( class -- complement ) + { + { [ dup anonymous-complement? ] [ class>> ] } + { [ dup object eq? ] [ drop null ] } + { [ dup null eq? ] [ drop object ] } + { [ t ] [ ] } + } cond ; + +: largest-class ( seq -- n elt ) + dup [ + [ 2dup class< >r swap class< not r> and ] + with subset empty? + ] curry find [ "Topological sort failed" throw ] unless* ; + +: sort-classes ( seq -- newseq ) + >vector + [ dup empty? not ] + [ dup largest-class >r over delete-nth r> ] + [ ] unfold nip ; + +: min-class ( class seq -- class/f ) + [ dupd classes-intersect? ] subset dup empty? [ + 2drop f + ] [ + tuck [ class< ] with all? [ peek ] [ drop f ] if + ] if ; + +: (flatten-class) ( class -- ) + { + { [ dup tuple-class? ] [ dup set ] } + { [ dup builtin-class? ] [ dup set ] } + { [ dup members ] [ members [ (flatten-class) ] each ] } + { [ dup superclass ] [ superclass (flatten-class) ] } + { [ t ] [ drop ] } + } cond ; + +: flatten-class ( class -- assoc ) + [ (flatten-class) ] H{ } make-assoc ; + +: class-hashes ( class -- seq ) + flatten-class keys [ + dup builtin-class? + [ "type" word-prop ] [ hashcode ] if + ] map ; + +: flatten-builtin-class ( class -- assoc ) + flatten-class [ + dup tuple class< [ 2drop tuple tuple ] when + ] assoc-map ; + +: class-types ( class -- seq ) + flatten-builtin-class keys + [ "type" word-prop ] map natural-sort ; + +: class-tags ( class -- tag/f ) + class-types [ + dup num-tags get >= + [ drop object tag-number ] when + ] map prune ; diff --git a/core/classes/classes-docs.factor b/core/classes/classes-docs.factor index 1e71173153..9573de8949 100755 --- a/core/classes/classes-docs.factor +++ b/core/classes/classes-docs.factor @@ -12,21 +12,6 @@ $nl { $subsection builtin-class? } "See " { $link "type-index" } " for a list of built-in classes." ; -ARTICLE: "class-operations" "Class operations" -"Set-theoretic operations on classes:" -{ $subsection class< } -{ $subsection class-and } -{ $subsection class-or } -{ $subsection classes-intersect? } -"Topological sort:" -{ $subsection sort-classes } -{ $subsection min-class } -"Low-level implementation detail:" -{ $subsection types } -{ $subsection flatten-class } -{ $subsection flatten-builtin-class } -{ $subsection flatten-union-class } ; - ARTICLE: "class-predicates" "Class predicate words" "With a handful of exceptions, each class has a membership predicate word, named " { $snippet { $emphasis "class" } "?" } " . A quotation calling this predicate is stored in the " { $snippet "\"predicate\"" } " word property." $nl @@ -93,15 +78,9 @@ HELP: tuple-class { $class-description "The class of tuple class words." } { $examples { $example "USING: classes prettyprint ;" "TUPLE: name title first last ;" "name tuple-class? ." "t" } } ; -HELP: typemap -{ $var-description "Hashtable mapping unions to class words, used to implement " { $link class-and } " and " { $link class-or } "." } ; - HELP: builtins { $var-description "Vector mapping type numbers to builtin class words." } ; -HELP: classclass ( n -- class ) builtins get-global nth ; @@ -37,146 +54,12 @@ PREDICATE: word predicate "predicating" word-prop >boolean ; r> predicate-effect define-declared ; : superclass ( class -- super ) - "superclass" word-prop ; + #! Output f for non-classes to work with algebra code + dup class? [ "superclass" word-prop ] [ drop f ] if ; -: members ( class -- seq ) "members" word-prop ; - -: class-empty? ( class -- ? ) members dup [ empty? ] when ; - -: (flatten-union-class) ( class -- ) - dup members [ - [ (flatten-union-class) ] each - ] [ - dup set - ] ?if ; - -: flatten-union-class ( class -- assoc ) - [ (flatten-union-class) ] H{ } make-assoc ; - -: (flatten-class) ( class -- ) - { - { [ dup tuple-class? ] [ dup set ] } - { [ dup builtin-class? ] [ dup set ] } - { [ dup members ] [ members [ (flatten-class) ] each ] } - { [ dup superclass ] [ superclass (flatten-class) ] } - { [ t ] [ drop ] } - } cond ; - -: flatten-class ( class -- assoc ) - [ (flatten-class) ] H{ } make-assoc ; - -: class-hashes ( class -- seq ) - flatten-class keys [ - dup builtin-class? - [ "type" word-prop ] [ hashcode ] if - ] map ; - -: (flatten-builtin-class) ( class -- ) - { - { [ dup members ] [ members [ (flatten-builtin-class) ] each ] } - { [ dup superclass ] [ superclass (flatten-builtin-class) ] } - { [ t ] [ dup set ] } - } cond ; - -: flatten-builtin-class ( class -- assoc ) - [ (flatten-builtin-class) ] H{ } make-assoc ; - -: types ( class -- seq ) - flatten-builtin-class keys - [ "type" word-prop ] map natural-sort ; - -: class< ( class1 class2 -- ? ) swap classr superclass r> 2dup and [ (class<) ] [ 2drop f ] if ; - -: union-class< ( cls1 cls2 -- ? ) - [ flatten-union-class ] 2apply keys - [ nip [ (class<) ] with contains? ] curry assoc-all? ; - -: (class<) ( class1 class2 -- ? ) - { - { [ 2dup eq? ] [ 2drop t ] } - { [ over class-empty? ] [ 2drop t ] } - { [ 2dup superclass< ] [ 2drop t ] } - { [ 2dup [ members not ] both? ] [ 2drop f ] } - { [ t ] [ union-class< ] } - } cond ; - -: lookup-union ( classes -- class ) - typemap get at dup empty? [ drop object ] [ first ] if ; - -: lookup-tuple-union ( classes -- class ) - class-map get at dup empty? [ drop object ] [ first ] if ; - -! : (class-or) ( class class -- class ) -! [ flatten-builtin-class ] 2apply union lookup-union ; -! -! : (class-and) ( class class -- class ) -! [ flatten-builtin-class ] 2apply intersect lookup-union ; - -: class-or-fixup ( set set -- set ) - union - tuple over key? - [ [ drop tuple-class? not ] assoc-subset ] when ; - -: (class-or) ( class class -- class ) - [ flatten-class ] 2apply class-or-fixup lookup-tuple-union ; - -: (class-and) ( class class -- class ) - 2dup [ tuple swap class< ] either? [ - [ flatten-builtin-class ] 2apply - intersect lookup-union - ] [ - [ flatten-class ] 2apply - intersect lookup-tuple-union - ] if ; - -: tuple-class-and ( class1 class2 -- class ) - dupd eq? [ drop null ] unless ; - -: largest-class ( seq -- n elt ) - dup [ - [ 2dup class< >r swap class< not r> and ] - with subset empty? - ] curry find [ "Topological sort failed" throw ] unless* ; - -PRIVATE> - -: sort-classes ( seq -- newseq ) - >vector - [ dup empty? not ] - [ dup largest-class >r over delete-nth r> ] - [ ] unfold nip ; - -: class-or ( class1 class2 -- class ) - { - { [ 2dup class< ] [ nip ] } - { [ 2dup swap class< ] [ drop ] } - { [ t ] [ (class-or) ] } - } cond ; - -: class-and ( class1 class2 -- class ) - { - { [ 2dup class< ] [ drop ] } - { [ 2dup swap class< ] [ nip ] } - { [ 2dup [ tuple-class? ] both? ] [ tuple-class-and ] } - { [ t ] [ (class-and) ] } - } cond ; - -: classes-intersect? ( class1 class2 -- ? ) - class-and class-empty? not ; - -: min-class ( class seq -- class/f ) - [ dupd classes-intersect? ] subset dup empty? [ - 2drop f - ] [ - tuck [ class< ] with all? [ peek ] [ drop f ] if - ] if ; +: members ( class -- seq ) + #! Output f for non-classes to work with algebra code + dup class? [ "members" word-prop ] [ drop f ] if ; GENERIC: reset-class ( class -- ) @@ -184,36 +67,9 @@ M: word reset-class drop ; assoc ] keep - classr >r 1vector r> r> set-at - ] if ; - -: typemap+ ( class -- ) - dup flatten-builtin-class typemap get push-at ; - -: pop-at ( value key assoc -- ) - at* [ delete ] [ 2drop ] if ; - -: typemap- ( class -- ) - dup flatten-builtin-class typemap get pop-at ; - -! class-map -: class-map+ ( class -- ) - dup flatten-class class-map get push-at ; - -: class-map- ( class -- ) - dup flatten-class class-map get pop-at ; - -! Class definition -: cache-class ( class -- ) - dup typemap+ dup class-map+ dup class : define-class-props ( members superclass metaclass -- assoc ) @@ -293,14 +108,12 @@ GENERIC: update-methods ( assoc -- ) : define-class ( word members superclass metaclass -- ) #! If it was already a class, update methods after. + reset-caches define-class-props - over class? >r - over class-usages [ - uncache-classes - dupd (define-class) - ] keep cache-classes r> - [ class-usages dup update-predicates update-methods ] - [ drop ] if ; + over update-map- + dupd (define-class) + dup update-map+ + class-usages dup update-predicates update-methods ; GENERIC: class ( object -- class ) inline diff --git a/core/generator/registers/registers.factor b/core/generator/registers/registers.factor index 307e3a99f1..e03923e860 100755 --- a/core/generator/registers/registers.factor +++ b/core/generator/registers/registers.factor @@ -1,9 +1,9 @@ ! Copyright (C) 2006, 2007 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays assocs classes classes.private combinators -cpu.architecture generator.fixup hashtables kernel layouts math -namespaces quotations sequences system vectors words effects -alien byte-arrays bit-arrays float-arrays ; +USING: arrays assocs classes classes.private classes.algebra +combinators cpu.architecture generator.fixup hashtables kernel +layouts math namespaces quotations sequences system vectors +words effects alien byte-arrays bit-arrays float-arrays ; IN: generator.registers SYMBOL: +input+ @@ -581,13 +581,14 @@ M: loc lazy-store 2drop t ] if ; +: class-tags ( class -- tag/f ) + class-types [ + dup num-tags get >= + [ drop object tag-number ] when + ] map prune ; + : class-tag ( class -- tag/f ) - dup hi-tag class< [ - drop object tag-number - ] [ - flatten-builtin-class keys - dup length 1 = [ first tag-number ] [ drop f ] if - ] if ; + class-tags dup length 1 = [ first ] [ drop f ] if ; : class-matches? ( actual expected -- ? ) { diff --git a/core/generic/generic-docs.factor b/core/generic/generic-docs.factor index b59c92c798..56de801e7a 100755 --- a/core/generic/generic-docs.factor +++ b/core/generic/generic-docs.factor @@ -1,6 +1,6 @@ -USING: help.markup help.syntax words classes definitions kernel -alien sequences math quotations generic.standard generic.math -combinators ; +USING: help.markup help.syntax words classes classes.algebra +definitions kernel alien sequences math quotations +generic.standard generic.math combinators ; IN: generic ARTICLE: "method-order" "Method precedence" diff --git a/core/generic/generic-tests.factor b/core/generic/generic-tests.factor index 785600cfb0..853a03d184 100755 --- a/core/generic/generic-tests.factor +++ b/core/generic/generic-tests.factor @@ -1,8 +1,8 @@ USING: alien arrays definitions generic generic.standard generic.math assocs hashtables io kernel math namespaces parser prettyprint sequences strings tools.test vectors words -quotations classes continuations layouts classes.union sorting -compiler.units ; +quotations classes classes.algebra continuations layouts +classes.union sorting compiler.units ; IN: generic.tests GENERIC: foobar ( x -- y ) diff --git a/core/generic/generic.factor b/core/generic/generic.factor index 8fe5e4921a..36ca0358b7 100755 --- a/core/generic/generic.factor +++ b/core/generic/generic.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: words kernel sequences namespaces assocs hashtables definitions kernel.private classes classes.private -quotations arrays vocabs effects ; +classes.algebra quotations arrays vocabs effects ; IN: generic ! Method combination protocol @@ -138,7 +138,7 @@ M: method-body forget* M: class forget* ( class -- ) dup forget-methods - dup uncache-class + dup update-map- forget-word ; M: assoc update-methods ( assoc -- ) diff --git a/core/generic/math/math.factor b/core/generic/math/math.factor index 46f57a1629..93c89af25c 100755 --- a/core/generic/math/math.factor +++ b/core/generic/math/math.factor @@ -1,8 +1,8 @@ -! Copyright (C) 2005, 2007 Slava Pestov. +! Copyright (C) 2005, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: arrays generic hashtables kernel kernel.private math namespaces sequences words quotations layouts combinators -sequences.private classes definitions ; +sequences.private classes classes.algebra definitions ; IN: generic.math PREDICATE: class math-class ( object -- ? ) @@ -16,8 +16,8 @@ PREDICATE: class math-class ( object -- ? ) : math-precedence ( class -- n ) { - { [ dup class-empty? ] [ drop { -1 -1 } ] } - { [ dup math-class? ] [ types last/first ] } + { [ dup null class< ] [ drop { -1 -1 } ] } + { [ dup math-class? ] [ class-types last/first ] } { [ t ] [ drop { 100 100 } ] } } cond ; diff --git a/core/generic/standard/standard.factor b/core/generic/standard/standard.factor index 37f72e7d95..4105a05cb1 100755 --- a/core/generic/standard/standard.factor +++ b/core/generic/standard/standard.factor @@ -3,7 +3,7 @@ USING: arrays assocs kernel kernel.private slots.private math namespaces sequences vectors words quotations definitions hashtables layouts combinators sequences.private generic -classes classes.private ; +classes classes.algebra classes.private ; IN: generic.standard TUPLE: standard-combination # ; diff --git a/core/inference/class/class.factor b/core/inference/class/class.factor index 690571de98..7764fd4fd1 100755 --- a/core/inference/class/class.factor +++ b/core/inference/class/class.factor @@ -2,8 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: arrays generic assocs hashtables inference kernel math namespaces sequences words parser math.intervals -effects classes inference.dataflow inference.backend -combinators ; +effects classes classes.algebra inference.dataflow +inference.backend combinators ; IN: inference.class ! Class inference @@ -88,8 +88,11 @@ M: interval-constraint apply-constraint swap interval-constraint-value intersect-value-interval ; : set-class-interval ( class value -- ) - >r "interval" word-prop dup - [ r> set-value-interval* ] [ r> 2drop ] if ; + over class? [ + over "interval" word-prop [ + >r "interval" word-prop r> set-value-interval* + ] [ 2drop ] if + ] [ 2drop ] if ; : value-class* ( value -- class ) value-classes get at object or ; diff --git a/core/optimizer/control/control.factor b/core/optimizer/control/control.factor index b04d4677ce..c108e3b1a7 100755 --- a/core/optimizer/control/control.factor +++ b/core/optimizer/control/control.factor @@ -3,8 +3,8 @@ USING: arrays generic assocs inference inference.class inference.dataflow inference.backend inference.state io kernel math namespaces sequences vectors words quotations hashtables -combinators classes generic.math continuations optimizer.def-use -optimizer.backend generic.standard ; +combinators classes classes.algebra generic.math continuations +optimizer.def-use optimizer.backend generic.standard ; IN: optimizer.control ! ! ! Rudimentary CFA diff --git a/core/optimizer/inlining/inlining.factor b/core/optimizer/inlining/inlining.factor index 04d7ab4ee5..1f3df92421 100755 --- a/core/optimizer/inlining/inlining.factor +++ b/core/optimizer/inlining/inlining.factor @@ -3,10 +3,10 @@ USING: arrays generic assocs inference inference.class inference.dataflow inference.backend inference.state io kernel math namespaces sequences vectors words quotations hashtables -combinators classes generic.math continuations optimizer.def-use -optimizer.backend generic.standard optimizer.specializers -optimizer.def-use optimizer.pattern-match generic.standard -optimizer.control kernel.private ; +combinators classes classes.algebra generic.math continuations +optimizer.def-use optimizer.backend generic.standard +optimizer.specializers optimizer.def-use optimizer.pattern-match +generic.standard optimizer.control kernel.private ; IN: optimizer.inlining : remember-inlining ( node history -- ) @@ -175,7 +175,7 @@ DEFER: (flat-length) : optimistic-inline? ( #call -- ? ) dup node-param "specializer" word-prop dup [ >r node-input-classes r> specialized-length tail* - [ types length 1 = ] all? + [ class-types length 1 = ] all? ] [ 2drop f ] if ; diff --git a/core/optimizer/known-words/known-words.factor b/core/optimizer/known-words/known-words.factor index 18c98c5115..0a3442566c 100755 --- a/core/optimizer/known-words/known-words.factor +++ b/core/optimizer/known-words/known-words.factor @@ -7,8 +7,9 @@ sequences words parser vectors strings sbufs io namespaces assocs quotations sequences.private io.binary io.crc32 io.streams.string layouts splitting math.intervals math.floats.private tuples tuples.private classes -optimizer.def-use optimizer.backend optimizer.pattern-match -optimizer.inlining float-arrays sequences.private combinators ; +classes.algebra optimizer.def-use optimizer.backend +optimizer.pattern-match optimizer.inlining float-arrays +sequences.private combinators ; ! the output of and has the class which is ! its second-to-last input @@ -89,10 +90,10 @@ optimizer.inlining float-arrays sequences.private combinators ; ! type applied to an object of a known type can be folded : known-type? ( node -- ? ) - node-class-first types length 1 number= ; + node-class-first class-types length 1 number= ; : fold-known-type ( node -- node ) - dup node-class-first types inline-literals ; + dup node-class-first class-types inline-literals ; \ type [ { [ dup known-type? ] [ fold-known-type ] } diff --git a/core/optimizer/math/math.factor b/core/optimizer/math/math.factor index 7afc177d10..349cf88f17 100755 --- a/core/optimizer/math/math.factor +++ b/core/optimizer/math/math.factor @@ -5,9 +5,10 @@ USING: alien alien.accessors arrays generic hashtables kernel assocs math math.private kernel.private sequences words parser inference.class inference.dataflow vectors strings sbufs io namespaces assocs quotations math.intervals sequences.private -combinators splitting layouts math.parser classes generic.math -optimizer.pattern-match optimizer.backend optimizer.def-use -optimizer.inlining generic.standard system ; +combinators splitting layouts math.parser classes +classes.algebra generic.math optimizer.pattern-match +optimizer.backend optimizer.def-use optimizer.inlining +generic.standard system ; { + bignum+ float+ fixnum+fast } { { { number 0 } [ drop ] } diff --git a/core/optimizer/pattern-match/pattern-match.factor b/core/optimizer/pattern-match/pattern-match.factor old mode 100644 new mode 100755 index ed78330492..0e7e801938 --- a/core/optimizer/pattern-match/pattern-match.factor +++ b/core/optimizer/pattern-match/pattern-match.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. IN: optimizer.pattern-match USING: kernel sequences inference namespaces generic -combinators classes inference.dataflow ; +combinators classes classes.algebra inference.dataflow ; ! Funny pattern matching SYMBOL: @ diff --git a/core/tuples/tuples-tests.factor b/core/tuples/tuples-tests.factor index b5076ea22b..fec3bdbc6f 100755 --- a/core/tuples/tuples-tests.factor +++ b/core/tuples/tuples-tests.factor @@ -5,9 +5,6 @@ generic.standard effects tuples tuples.private arrays vectors strings compiler.units ; IN: tuples.tests -[ t ] [ \ tuple-class \ class class< ] unit-test -[ f ] [ \ class \ tuple-class class< ] unit-test - TUPLE: rect x y w h ; : rect construct-boa ; @@ -90,12 +87,6 @@ TUPLE: delegate-clone ; [ T{ delegate-clone T{ empty f } } ] [ T{ delegate-clone T{ empty f } } clone ] unit-test -[ t ] [ \ null \ delegate-clone class< ] unit-test -[ f ] [ \ object \ delegate-clone class< ] unit-test -[ f ] [ \ object \ delegate-clone class< ] unit-test -[ t ] [ \ delegate-clone \ tuple class< ] unit-test -[ f ] [ \ tuple \ delegate-clone class< ] unit-test - ! Compiler regression [ t length ] [ no-method-object t eq? ] must-fail-with @@ -121,7 +112,7 @@ TUPLE: yo-momma ; [ [ t ] [ \ yo-momma class? ] unit-test [ ] [ \ yo-momma forget ] unit-test - [ f ] [ \ yo-momma typemap get values memq? ] unit-test + [ f ] [ \ yo-momma update-map get values memq? ] unit-test [ f ] [ \ yo-momma crossref get at ] unit-test ] with-compilation-unit diff --git a/extra/tools/deploy/shaker/shaker.factor b/extra/tools/deploy/shaker/shaker.factor index 76e4a212b2..754d93d9b4 100755 --- a/extra/tools/deploy/shaker/shaker.factor +++ b/extra/tools/deploy/shaker/shaker.factor @@ -148,8 +148,12 @@ IN: tools.deploy.shaker layouts:tag-mask layouts:tag-numbers layouts:type-numbers - classes:typemap - classes:class-map + classes:class<-cache + classes:class-not-cache + classes:classes-intersect-cache + classes:class-and-cache + classes:class-or-cache + classes:update-map vocab-roots definitions:crossref compiled-crossref From 577c670631086600ea675467195325438fe1e2b8 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 24 Mar 2008 20:15:42 -0500 Subject: [PATCH 13/15] Test fix --- core/optimizer/optimizer-tests.factor | 5 ++-- extra/tools/deploy/shaker/shaker.factor | 36 ++++++++++++------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/core/optimizer/optimizer-tests.factor b/core/optimizer/optimizer-tests.factor index 3abccecc7f..89cea45aee 100755 --- a/core/optimizer/optimizer-tests.factor +++ b/core/optimizer/optimizer-tests.factor @@ -1,8 +1,9 @@ USING: arrays compiler.units generic hashtables inference kernel kernel.private math optimizer prettyprint sequences sbufs strings tools.test vectors words sequences.private quotations -optimizer.backend classes inference.dataflow tuples.private -continuations growable optimizer.inlining namespaces hints ; +optimizer.backend classes classes.algebra inference.dataflow +tuples.private continuations growable optimizer.inlining +namespaces hints ; IN: optimizer.tests [ H{ { 1 5 } { 3 4 } { 2 5 } } ] [ diff --git a/extra/tools/deploy/shaker/shaker.factor b/extra/tools/deploy/shaker/shaker.factor index 754d93d9b4..f731f5d694 100755 --- a/extra/tools/deploy/shaker/shaker.factor +++ b/extra/tools/deploy/shaker/shaker.factor @@ -139,31 +139,29 @@ IN: tools.deploy.shaker { } { "cpu" } strip-vocab-globals % { - vocabs:dictionary - lexer-factory - vocabs:load-vocab-hook - root-cache + classes:class-and-cache + classes:class-not-cache + classes:class-or-cache + classes:class<-cache + classes:classes-intersect-cache + classes:update-map + compiled-crossref + compiler.units:recompile-hook + definitions:crossref + interactive-vocabs layouts:num-tags layouts:num-types layouts:tag-mask layouts:tag-numbers layouts:type-numbers - classes:class<-cache - classes:class-not-cache - classes:classes-intersect-cache - classes:class-and-cache - classes:class-or-cache - classes:update-map - vocab-roots - definitions:crossref - compiled-crossref - interactive-vocabs - word - compiler.units:recompile-hook - listener:listener-hook lexer-factory - classes:update-map - classes:class Date: Mon, 24 Mar 2008 20:44:39 -0500 Subject: [PATCH 14/15] Fix --- extra/io/unix/unix.factor | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extra/io/unix/unix.factor b/extra/io/unix/unix.factor index d1c0db72f4..0a7fc72662 100755 --- a/extra/io/unix/unix.factor +++ b/extra/io/unix/unix.factor @@ -1,5 +1,5 @@ USING: io.unix.backend io.unix.files io.unix.sockets io.timeouts -io.unix.launcher io.unix.mmap io.backend io.unix.process -combinators namespaces system vocabs.loader sequences ; +io.unix.launcher io.unix.mmap io.backend combinators namespaces +system vocabs.loader sequences ; "io.unix." os append require From 1c75abce235a4062b1ef4f66db53af97b5a19fa3 Mon Sep 17 00:00:00 2001 From: Eduardo Cavazos Date: Tue, 25 Mar 2008 04:40:36 -0600 Subject: [PATCH 15/15] lsys.ui: Add a '500 sleep' workaround --- extra/lsys/ui/ui.factor | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extra/lsys/ui/ui.factor b/extra/lsys/ui/ui.factor index 45372aec6c..c8d103a084 100644 --- a/extra/lsys/ui/ui.factor +++ b/extra/lsys/ui/ui.factor @@ -196,6 +196,8 @@ slate> handler> set-gadget-delegate handler> "L-system view" open-window +500 sleep + slate> find-gl-context 1 glGenLists >model