Merge branch 'master' of git://github.com/littledan/Factor

db4
Slava Pestov 2010-02-03 22:58:25 +13:00
commit ed9251c56c
9 changed files with 176610 additions and 156643 deletions

View File

@ -1,10 +1,10 @@
# GraphemeBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:22 GMT [MD]
# GraphemeBreakTest-5.2.0.txt
# Date: 2009-09-19, 00:42:12 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme Break Test
#
@ -15,7 +15,7 @@
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the line_break property* for the sample character
# - (x) the Grapheme_Break property* for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.

View File

@ -1,10 +1,10 @@
# WordBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:28 GMT [MD]
# WordBreakTest-5.2.0.txt
# Date: 2009-09-19, 00:42:16 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Word Break Test
#
@ -15,7 +15,7 @@
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the line_break property* for the sample character
# - (x) the Word_Break property* for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.

291149
basis/unicode/collation/CollationTest_SHIFTED.txt Executable file → Normal file

File diff suppressed because it is too large Load Diff

38395
basis/unicode/collation/allkeys.txt Executable file → Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
# CompositionExclusions-5.1.0.txt
# Date: 2008-03-20, 17:45:00 PDT [KW]
# CompositionExclusions-5.2.0.txt
# Date: 2009-05-22, 12:52:00 PDT [KW]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@ -7,7 +7,7 @@
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see
@ -170,11 +170,11 @@ FB4E # HEBREW LETTER PE WITH RAFE
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 1030
# Total code points: 1033
# ================================================
# (4) Non-Starter Decompositions

View File

@ -1,10 +1,10 @@
# PropList-5.1.0.txt
# Date: 2008-03-20, 17:55:27 GMT [MD]
# PropList-5.2.0.txt
# Date: 2009-08-22, 04:58:40 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -41,6 +41,7 @@
002D ; Dash # Pd HYPHEN-MINUS
058A ; Dash # Pd ARMENIAN HYPHEN
05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF
1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN
1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN
2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR
2053 ; Dash # Po SWUNG DASH
@ -57,7 +58,7 @@ FE58 ; Dash # Pd SMALL EM DASH
FE63 ; Dash # Pd SMALL HYPHEN-MINUS
FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
# Total code points: 24
# Total code points: 25
# ================================================
@ -124,6 +125,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK
0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD
@ -137,6 +139,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS
1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP
1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
@ -145,11 +148,16 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK
A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA
A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI
AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI
ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI
FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP
FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK
@ -161,10 +169,13 @@ FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP
FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER
103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER
10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN
1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR
10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
12470..12473 ; Terminal_Punctuation # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
# Total code points: 119
# Total code points: 161
# ================================================
@ -347,11 +358,17 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA
07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN
0901..0902 ; Other_Alphabetic # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
0900..0902 ; Other_Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094E ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0955 ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
@ -454,6 +471,8 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A
109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI
135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U
1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
@ -476,6 +495,16 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
19C8..19C9 ; Other_Alphabetic # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Other_Alphabetic # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A
1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
@ -494,6 +523,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CF2 ; Other_Alphabetic # Mc VEDIC SIGN ARDHAVISARGA
24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
@ -504,6 +534,14 @@ A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAU
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN
A9B3 ; Other_Alphabetic # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA
AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -512,12 +550,25 @@ AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA
AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H
AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM
ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
# Total code points: 663
# Total code points: 759
# ================================================
@ -526,14 +577,15 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FC3 ; Ideographic # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
4E00..9FCB ; Ideographic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
FA30..FA6A ; Ideographic # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
FA30..FA6D ; Ideographic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 71248
# Total code points: 75408
# ================================================
@ -577,6 +629,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
@ -611,25 +664,35 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
1B44 ; Diacritic # Mc BALINESE ADEG ADEG
1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH
1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA
1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
1DFE..1DFF ; Diacritic # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1FBD ; Diacritic # Sk GREEK KORONIS
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA
2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2E2F ; Diacritic # Lm VERTICAL TILDE
302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
@ -638,13 +701,24 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET
A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A67F ; Diacritic # Lm CYRILLIC PAYEROK
A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
A92E ; Diacritic # Po KAYAH LI SIGN CWI
A953 ; Diacritic # Mc REJANG VIRAMA
A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU
A9C0 ; Diacritic # Mc JAVANESE PANGKON
AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE
AABF ; Diacritic # Mn TAI VIET TONE MAI EK
AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG
AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO
AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG
ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK
ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK
FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE20..FE26 ; Diacritic # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT
@ -652,13 +726,14 @@ FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT
FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE3 ; Diacritic # Sk FULLWIDTH MACRON
110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
# Total code points: 565
# Total code points: 639
# ================================================
@ -669,6 +744,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK
0EC6 ; Extender # Lm LAO KO LA
1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK
1C36 ; Extender # Mn LEPCHA SIGN RAN
1C7B ; Extender # Lm OL CHIKI RELAA
3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK
@ -677,9 +753,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
A015 ; Extender # Lm YI SYLLABLE WU
A60C ; Extender # Lm VAI SYLLABLE LENGTHENER
A9CF ; Extender # Lm JAVANESE PANGRANGKEP
AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AADD ; Extender # Lm TAI VIET SYMBOL SAM
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
# Total code points: 24
# Total code points: 28
# ================================================
@ -774,7 +853,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
# ================================================
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FC3 ; Unified_Ideograph # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
4E00..9FCB ; Unified_Ideograph # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
@ -783,8 +862,9 @@ FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
# Total code points: 70237
# Total code points: 74394
# ================================================
@ -803,14 +883,17 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>.
# ================================================
0340..0341 ; Deprecated # Mn [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
17A3 ; Deprecated # Lo KHMER INDEPENDENT VOWEL QAQ
17D3 ; Deprecated # Mn KHMER SIGN BATHAMASAT
0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR
0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL
17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA
206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
E0001 ; Deprecated # Cf LANGUAGE TAG
E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 107
# Total code points: 110
# ================================================
@ -829,7 +912,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL
1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
2071 ; Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I
2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J
2C7C ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER J
1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J
@ -852,8 +935,11 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI
0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA
AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY
# Total code points: 10
# Total code points: 15
# ================================================
@ -899,19 +985,25 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2E2E ; STerm # Po REVERSED QUESTION MARK
3002 ; STerm # Po IDEOGRAPHIC FULL STOP
A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
A6F3 ; STerm # Po BAMUM FULL STOP
A6F7 ; STerm # Po BAMUM QUESTION MARK
A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A92F ; STerm # Po KAYAH LI SIGN SHYA
A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
FE52 ; STerm # Po SMALL FULL STOP
FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
FF0E ; STerm # Po FULLWIDTH FULL STOP
FF1F ; STerm # Po FULLWIDTH QUESTION MARK
FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
# Total code points: 56
# Total code points: 66
# ================================================
@ -1024,8 +1116,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E7 ; Pattern_Syntax # So [6] WHITE TRAPEZIUM..ELECTRICAL INTERSECTION
23E8..23FF ; Pattern_Syntax # Cn [24] <reserved-23E8>..<reserved-23FF>
23E2..23E8 ; Pattern_Syntax # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
23E9..23FF ; Pattern_Syntax # Cn [23] <reserved-23E9>..<reserved-23FF>
2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
@ -1038,12 +1130,14 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN
2670..269D ; Pattern_Syntax # So [46] WEST SYRIAC CROSS..OUTLINED WHITE STAR
269E..269F ; Pattern_Syntax # Cn [2] <reserved-269E>..<reserved-269F>
26A0..26BC ; Pattern_Syntax # So [29] WARNING SIGN..SESQUIQUADRATE
26BD..26BF ; Pattern_Syntax # Cn [3] <reserved-26BD>..<reserved-26BF>
26C0..26C3 ; Pattern_Syntax # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
26C4..2700 ; Pattern_Syntax # Cn [61] <reserved-26C4>..<reserved-2700>
2670..26CD ; Pattern_Syntax # So [94] WEST SYRIAC CROSS..DISABLED CAR
26CE ; Pattern_Syntax # Cn <reserved-26CE>
26CF..26E1 ; Pattern_Syntax # So [19] PICK..RESTRICTED LEFT ENTRY-2
26E2 ; Pattern_Syntax # Cn <reserved-26E2>
26E3 ; Pattern_Syntax # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E4..26E7 ; Pattern_Syntax # Cn [4] <reserved-26E4>..<reserved-26E7>
26E8..26FF ; Pattern_Syntax # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2700 ; Pattern_Syntax # Cn <reserved-2700>
2701..2704 ; Pattern_Syntax # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2705 ; Pattern_Syntax # Cn <reserved-2705>
2706..2709 ; Pattern_Syntax # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
@ -1056,9 +1150,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
274E ; Pattern_Syntax # Cn <reserved-274E>
274F..2752 ; Pattern_Syntax # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2753..2755 ; Pattern_Syntax # Cn [3] <reserved-2753>..<reserved-2755>
2756 ; Pattern_Syntax # So BLACK DIAMOND MINUS WHITE X
2757 ; Pattern_Syntax # Cn <reserved-2757>
2758..275E ; Pattern_Syntax # So [7] LIGHT VERTICAL BAR..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2756..275E ; Pattern_Syntax # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
275F..2760 ; Pattern_Syntax # Cn [2] <reserved-275F>..<reserved-2760>
2761..2767 ; Pattern_Syntax # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
@ -1138,8 +1230,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B4F ; Pattern_Syntax # Cn [3] <reserved-2B4D>..<reserved-2B4F>
2B50..2B54 ; Pattern_Syntax # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON
2B55..2BFF ; Pattern_Syntax # Cn [171] <reserved-2B55>..<reserved-2BFF>
2B50..2B59 ; Pattern_Syntax # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2B5A..2BFF ; Pattern_Syntax # Cn [166] <reserved-2B5A>..<reserved-2BFF>
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
@ -1171,8 +1263,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; Pattern_Syntax # Lm VERTICAL TILDE
2E30 ; Pattern_Syntax # Po RING POINT
2E31..2E7F ; Pattern_Syntax # Cn [79] <reserved-2E31>..<reserved-2E7F>
2E30..2E31 ; Pattern_Syntax # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
2E32..2E7F ; Pattern_Syntax # Cn [78] <reserved-2E32>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
# WordBreakProperty-5.1.0.txt
# Date: 2008-03-20, 17:55:36 GMT [MD]
# WordBreakProperty-5.2.0.txt
# Date: 2009-07-12, 04:17:35 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -58,14 +58,19 @@
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Extend # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
094E ; Extend # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0951..0955 ; Extend # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; Extend # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
@ -190,6 +195,8 @@
1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109C ; Extend # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI
135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
@ -216,6 +223,18 @@
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; Extend # Mn TAI THAM SIGN SAKOT
1A61 ; Extend # Mc TAI THAM VOWEL SIGN A
1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Extend # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Extend # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Extend # Mc BALINESE SIGN BISAH
1B34 ; Extend # Mn BALINESE SIGN REREKAN
@ -238,20 +257,28 @@
1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; Extend # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; Extend # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
@ -261,9 +288,18 @@ A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Extend # Mc JAVANESE SIGN WIGNYAN
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Extend # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Extend # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Extend # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -272,6 +308,19 @@ AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H
AA7B ; Extend # Mc MYANMAR SIGN PAO KAREN TONE
AAB0 ; Extend # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; Extend # Mn TAI VIET TONE MAI THO
ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
ABEC ; Extend # Mc MEETEI MAYEK LUM IYEK
ABED ; Extend # Mn MEETEI MAYEK APUN IYEK
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
@ -282,6 +331,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
11082 ; Extend # Mc KAITHI SIGN VISARGA
110B0..110B2 ; Extend # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
@ -291,7 +346,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1285
# Total code points: 1455
# ================================================
@ -300,13 +355,13 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Format # Cf KAITHI NUMBER SIGN
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
@ -362,7 +417,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
048A..0523 ; ALetter # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
048A..0525 ; ALetter # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
@ -386,13 +441,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07FA ; ALetter # Lm NKO LAJANYALAN
0800..0815 ; ALetter # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
081A ; ALetter # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A
0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; ALetter # Lo DEVANAGARI OM
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; ALetter # Lo DEVANAGARI LETTER CANDRA A
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
0979..097F ; ALetter # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
@ -479,10 +538,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
@ -501,7 +557,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
166F..167F ; ALetter # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
@ -516,6 +572,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
@ -526,6 +583,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
@ -551,8 +610,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
2107 ; ALetter # L& EULER CONSTANT
@ -575,10 +634,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C6F ; ALetter # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
2C71..2C7C ; ALetter # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
2C60..2C7C ; ALetter # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI
2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
@ -601,6 +660,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; ALetter # Lm YI SYLLABLE WU
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A4D0..A4F7 ; ALetter # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; ALetter # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER
A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
@ -610,6 +671,8 @@ A662..A66D ; ALetter # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMA
A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O
A67F ; ALetter # Lm CYRILLIC PAYEROK
A680..A697 ; ALetter # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; ALetter # Lm MODIFIER LETTER US
@ -622,12 +685,20 @@ A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER G
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8F2..A8F7 ; ALetter # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8FB ; ALetter # Lo DEVANAGARI HEADSTROKE
A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A
A960..A97C ; ALetter # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
A984..A9B2 ; ALetter # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
A9CF ; ALetter # Lm JAVANESE PANGRANGKEP
AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; ALetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; ALetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
@ -677,15 +748,22 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A60..10A7C ; ALetter # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10B00..10B35 ; ALetter # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B40..10B55 ; ALetter # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B60..10B72 ; ALetter # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
@ -717,7 +795,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
# Total code points: 21903
# Total code points: 23694
# ================================================
@ -788,7 +866,9 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19D0..19DA ; Numeric # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
1A80..1A89 ; Numeric # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; Numeric # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
@ -796,11 +876,13 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A9D0..A9D9 ; Numeric # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
# Total code points: 361
# Total code points: 402
# ================================================

View File

@ -2,7 +2,7 @@ USING: help.markup help.syntax strings ;
IN: unicode
ARTICLE: "unicode" "Unicode support"
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.2 character set."
$nl
"The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
$nl