Merge branch 'master' of git://github.com/littledan/Factor
commit
ed9251c56c
|
@ -1,10 +1,10 @@
|
|||
# GraphemeBreakTest-5.1.0.txt
|
||||
# Date: 2008-03-11, 02:19:22 GMT [MD]
|
||||
# GraphemeBreakTest-5.2.0.txt
|
||||
# Date: 2009-09-19, 00:42:12 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Default Grapheme Break Test
|
||||
#
|
||||
|
@ -15,7 +15,7 @@
|
|||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - (x) the Grapheme_Break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# WordBreakTest-5.1.0.txt
|
||||
# Date: 2008-03-11, 02:19:28 GMT [MD]
|
||||
# WordBreakTest-5.2.0.txt
|
||||
# Date: 2009-09-19, 00:42:16 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Default Word Break Test
|
||||
#
|
||||
|
@ -15,7 +15,7 @@
|
|||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - (x) the Word_Break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,5 @@
|
|||
# CompositionExclusions-5.1.0.txt
|
||||
# Date: 2008-03-20, 17:45:00 PDT [KW]
|
||||
# CompositionExclusions-5.2.0.txt
|
||||
# Date: 2009-05-22, 12:52:00 PDT [KW]
|
||||
#
|
||||
# This file lists the characters for the Composition Exclusion Table
|
||||
# defined in UAX #15, Unicode Normalization Forms.
|
||||
|
@ -7,7 +7,7 @@
|
|||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# For more information, see
|
||||
|
@ -170,11 +170,11 @@ FB4E # HEBREW LETTER PE WITH RAFE
|
|||
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
|
||||
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
|
||||
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||||
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
|
||||
# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 1030
|
||||
# Total code points: 1033
|
||||
|
||||
# ================================================
|
||||
# (4) Non-Starter Decompositions
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# PropList-5.1.0.txt
|
||||
# Date: 2008-03-20, 17:55:27 GMT [MD]
|
||||
# PropList-5.2.0.txt
|
||||
# Date: 2009-08-22, 04:58:40 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
|||
002D ; Dash # Pd HYPHEN-MINUS
|
||||
058A ; Dash # Pd ARMENIAN HYPHEN
|
||||
05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF
|
||||
1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN
|
||||
1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN
|
||||
2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR
|
||||
2053 ; Dash # Po SWUNG DASH
|
||||
|
@ -57,7 +58,7 @@ FE58 ; Dash # Pd SMALL EM DASH
|
|||
FE63 ; Dash # Pd SMALL HYPHEN-MINUS
|
||||
FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
|
||||
|
||||
# Total code points: 24
|
||||
# Total code points: 25
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -124,6 +125,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
|||
0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
|
||||
070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
|
||||
07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK
|
||||
0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
|
||||
0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
|
||||
0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
|
||||
0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD
|
||||
|
@ -137,6 +139,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
|||
1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS
|
||||
1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP
|
||||
1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
|
||||
1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
|
||||
1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
|
||||
1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
|
||||
|
@ -145,11 +148,16 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
|
|||
2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK
|
||||
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
|
||||
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
|
||||
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
|
||||
A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK
|
||||
A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA
|
||||
A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI
|
||||
ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI
|
||||
FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP
|
||||
FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
|
||||
FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK
|
||||
|
@ -161,10 +169,13 @@ FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
|||
FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
|
||||
1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER
|
||||
103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER
|
||||
10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN
|
||||
1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR
|
||||
10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
|
||||
110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
12470..12473 ; Terminal_Punctuation # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
|
||||
|
||||
# Total code points: 119
|
||||
# Total code points: 161
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -347,11 +358,17 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
|||
0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
||||
0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA
|
||||
07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
0901..0902 ; Other_Alphabetic # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF
|
||||
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
|
||||
0900..0902 ; Other_Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
|
||||
093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094E ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
0955 ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU
|
||||
0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
|
@ -454,6 +471,8 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
|||
1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
|
||||
1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
|
||||
1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
|
||||
109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A
|
||||
109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U
|
||||
1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
|
||||
|
@ -476,6 +495,16 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
|||
19C8..19C9 ; Other_Alphabetic # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
|
||||
1A19..1A1B ; Other_Alphabetic # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
|
||||
1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA
|
||||
1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA
|
||||
1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI
|
||||
1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
|
||||
1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A
|
||||
1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT
|
||||
1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
|
||||
1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
|
||||
1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
|
||||
1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
|
||||
1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
|
||||
1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
|
@ -494,6 +523,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
|
|||
1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
|
||||
1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
|
||||
1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
|
||||
1CF2 ; Other_Alphabetic # Mc VEDIC SIGN ARDHAVISARGA
|
||||
24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
|
||||
|
@ -504,6 +534,14 @@ A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAU
|
|||
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
|
||||
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
|
||||
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
|
||||
A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
|
||||
A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN
|
||||
A9B3 ; Other_Alphabetic # Mn JAVANESE SIGN CECAK TELU
|
||||
A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
|
||||
A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
|
||||
A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
|
||||
A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET
|
||||
A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA
|
||||
AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
|
||||
AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
|
||||
AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
|
||||
|
@ -512,12 +550,25 @@ AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA
|
|||
AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG
|
||||
AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M
|
||||
AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H
|
||||
AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG
|
||||
AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
|
||||
AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
|
||||
AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM
|
||||
ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
|
||||
ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP
|
||||
ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
|
||||
ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP
|
||||
ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
|
||||
FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
|
||||
10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
|
||||
10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
|
||||
110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
|
||||
110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
|
||||
|
||||
# Total code points: 663
|
||||
# Total code points: 759
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -526,14 +577,15 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
|||
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FC3 ; Ideographic # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
|
||||
4E00..9FCB ; Ideographic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
|
||||
F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||||
FA30..FA6A ; Ideographic # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
|
||||
FA30..FA6D ; Ideographic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 71248
|
||||
# Total code points: 75408
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -577,6 +629,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
|||
07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
|
||||
0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
|
||||
093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
|
||||
094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
|
@ -611,25 +664,35 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
|||
1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
|
||||
108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
|
||||
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
|
||||
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
|
||||
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
|
||||
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
|
||||
1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
|
||||
1B44 ; Diacritic # Mc BALINESE ADEG ADEG
|
||||
1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH
|
||||
1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
|
||||
1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
|
||||
1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA
|
||||
1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
|
||||
1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
|
||||
1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
|
||||
1DFE..1DFF ; Diacritic # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1FBD ; Diacritic # Sk GREEK KORONIS
|
||||
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
|
||||
1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
|
||||
1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
|
||||
1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
|
||||
1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA
|
||||
2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
|
||||
2E2F ; Diacritic # Lm VERTICAL TILDE
|
||||
302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
|
@ -638,13 +701,24 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
|
|||
A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET
|
||||
A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
|
||||
A67F ; Diacritic # Lm CYRILLIC PAYEROK
|
||||
A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
|
||||
A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
|
||||
A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
|
||||
A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
|
||||
A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA
|
||||
A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
|
||||
A92E ; Diacritic # Po KAYAH LI SIGN CWI
|
||||
A953 ; Diacritic # Mc REJANG VIRAMA
|
||||
A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU
|
||||
A9C0 ; Diacritic # Mc JAVANESE PANGKON
|
||||
AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE
|
||||
AABF ; Diacritic # Mn TAI VIET TONE MAI EK
|
||||
AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG
|
||||
AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO
|
||||
AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG
|
||||
ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK
|
||||
ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK
|
||||
FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE20..FE26 ; Diacritic # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT
|
||||
|
@ -652,13 +726,14 @@ FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT
|
|||
FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
||||
110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
|
||||
# Total code points: 565
|
||||
# Total code points: 639
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -669,6 +744,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
|||
0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK
|
||||
0EC6 ; Extender # Lm LAO KO LA
|
||||
1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK
|
||||
1C36 ; Extender # Mn LEPCHA SIGN RAN
|
||||
1C7B ; Extender # Lm OL CHIKI RELAA
|
||||
3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK
|
||||
|
@ -677,9 +753,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
|
|||
30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
|
||||
A015 ; Extender # Lm YI SYLLABLE WU
|
||||
A60C ; Extender # Lm VAI SYLLABLE LENGTHENER
|
||||
A9CF ; Extender # Lm JAVANESE PANGRANGKEP
|
||||
AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
|
||||
AADD ; Extender # Lm TAI VIET SYMBOL SAM
|
||||
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
|
||||
# Total code points: 24
|
||||
# Total code points: 28
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -774,7 +853,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
|
|||
# ================================================
|
||||
|
||||
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FC3 ; Unified_Ideograph # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
|
||||
4E00..9FCB ; Unified_Ideograph # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
|
||||
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
|
||||
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
|
||||
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
|
||||
|
@ -783,8 +862,9 @@ FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
|
|||
FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
|
||||
FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
|
||||
20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
|
||||
# Total code points: 70237
|
||||
# Total code points: 74394
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -803,14 +883,17 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>.
|
|||
|
||||
# ================================================
|
||||
|
||||
0340..0341 ; Deprecated # Mn [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
|
||||
17A3 ; Deprecated # Lo KHMER INDEPENDENT VOWEL QAQ
|
||||
17D3 ; Deprecated # Mn KHMER SIGN BATHAMASAT
|
||||
0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR
|
||||
0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL
|
||||
17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA
|
||||
206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
|
||||
2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
|
||||
232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
|
||||
E0001 ; Deprecated # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 107
|
||||
# Total code points: 110
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -829,7 +912,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
|
|||
1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL
|
||||
1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
|
||||
1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
|
||||
2071 ; Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
|
||||
2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J
|
||||
2C7C ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER J
|
||||
1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J
|
||||
|
@ -852,8 +935,11 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
|
|||
|
||||
0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI
|
||||
0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
|
||||
AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
|
||||
AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA
|
||||
AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY
|
||||
|
||||
# Total code points: 10
|
||||
# Total code points: 15
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -899,19 +985,25 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
|
|||
2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
|
||||
2E2E ; STerm # Po REVERSED QUESTION MARK
|
||||
3002 ; STerm # Po IDEOGRAPHIC FULL STOP
|
||||
A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
|
||||
A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
|
||||
A6F3 ; STerm # Po BAMUM FULL STOP
|
||||
A6F7 ; STerm # Po BAMUM QUESTION MARK
|
||||
A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
|
||||
A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
|
||||
A92F ; STerm # Po KAYAH LI SIGN SHYA
|
||||
A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
|
||||
AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
|
||||
ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
|
||||
FE52 ; STerm # Po SMALL FULL STOP
|
||||
FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
|
||||
FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
|
||||
FF0E ; STerm # Po FULLWIDTH FULL STOP
|
||||
FF1F ; STerm # Po FULLWIDTH QUESTION MARK
|
||||
FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
|
||||
|
||||
# Total code points: 56
|
||||
# Total code points: 66
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1024,8 +1116,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
|||
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23E7 ; Pattern_Syntax # So [6] WHITE TRAPEZIUM..ELECTRICAL INTERSECTION
|
||||
23E8..23FF ; Pattern_Syntax # Cn [24] <reserved-23E8>..<reserved-23FF>
|
||||
23E2..23E8 ; Pattern_Syntax # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
|
||||
23E9..23FF ; Pattern_Syntax # Cn [23] <reserved-23E9>..<reserved-23FF>
|
||||
2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
|
||||
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
|
@ -1038,12 +1130,14 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
|||
25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
|
||||
2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
|
||||
266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN
|
||||
2670..269D ; Pattern_Syntax # So [46] WEST SYRIAC CROSS..OUTLINED WHITE STAR
|
||||
269E..269F ; Pattern_Syntax # Cn [2] <reserved-269E>..<reserved-269F>
|
||||
26A0..26BC ; Pattern_Syntax # So [29] WARNING SIGN..SESQUIQUADRATE
|
||||
26BD..26BF ; Pattern_Syntax # Cn [3] <reserved-26BD>..<reserved-26BF>
|
||||
26C0..26C3 ; Pattern_Syntax # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
|
||||
26C4..2700 ; Pattern_Syntax # Cn [61] <reserved-26C4>..<reserved-2700>
|
||||
2670..26CD ; Pattern_Syntax # So [94] WEST SYRIAC CROSS..DISABLED CAR
|
||||
26CE ; Pattern_Syntax # Cn <reserved-26CE>
|
||||
26CF..26E1 ; Pattern_Syntax # So [19] PICK..RESTRICTED LEFT ENTRY-2
|
||||
26E2 ; Pattern_Syntax # Cn <reserved-26E2>
|
||||
26E3 ; Pattern_Syntax # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
|
||||
26E4..26E7 ; Pattern_Syntax # Cn [4] <reserved-26E4>..<reserved-26E7>
|
||||
26E8..26FF ; Pattern_Syntax # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2700 ; Pattern_Syntax # Cn <reserved-2700>
|
||||
2701..2704 ; Pattern_Syntax # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
|
||||
2705 ; Pattern_Syntax # Cn <reserved-2705>
|
||||
2706..2709 ; Pattern_Syntax # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
|
||||
|
@ -1056,9 +1150,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
|||
274E ; Pattern_Syntax # Cn <reserved-274E>
|
||||
274F..2752 ; Pattern_Syntax # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
|
||||
2753..2755 ; Pattern_Syntax # Cn [3] <reserved-2753>..<reserved-2755>
|
||||
2756 ; Pattern_Syntax # So BLACK DIAMOND MINUS WHITE X
|
||||
2757 ; Pattern_Syntax # Cn <reserved-2757>
|
||||
2758..275E ; Pattern_Syntax # So [7] LIGHT VERTICAL BAR..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
2756..275E ; Pattern_Syntax # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
275F..2760 ; Pattern_Syntax # Cn [2] <reserved-275F>..<reserved-2760>
|
||||
2761..2767 ; Pattern_Syntax # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
|
||||
2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
|
||||
|
@ -1138,8 +1230,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
|||
2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
|
||||
2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
|
||||
2B4D..2B4F ; Pattern_Syntax # Cn [3] <reserved-2B4D>..<reserved-2B4F>
|
||||
2B50..2B54 ; Pattern_Syntax # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON
|
||||
2B55..2BFF ; Pattern_Syntax # Cn [171] <reserved-2B55>..<reserved-2BFF>
|
||||
2B50..2B59 ; Pattern_Syntax # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
|
||||
2B5A..2BFF ; Pattern_Syntax # Cn [166] <reserved-2B5A>..<reserved-2BFF>
|
||||
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
|
||||
2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
|
||||
|
@ -1171,8 +1263,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
|
|||
2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS
|
||||
2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
|
||||
2E2F ; Pattern_Syntax # Lm VERTICAL TILDE
|
||||
2E30 ; Pattern_Syntax # Po RING POINT
|
||||
2E31..2E7F ; Pattern_Syntax # Cn [79] <reserved-2E31>..<reserved-2E7F>
|
||||
2E30..2E31 ; Pattern_Syntax # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
|
||||
2E32..2E7F ; Pattern_Syntax # Cn [78] <reserved-2E32>..<reserved-2E7F>
|
||||
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,10 +1,10 @@
|
|||
# WordBreakProperty-5.1.0.txt
|
||||
# Date: 2008-03-20, 17:55:36 GMT [MD]
|
||||
# WordBreakProperty-5.2.0.txt
|
||||
# Date: 2009-07-12, 04:17:35 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# Copyright (c) 1991-2009 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -58,14 +58,19 @@
|
|||
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
|
||||
081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
|
||||
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
|
||||
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094D ; Extend # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
094E ; Extend # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
0951..0955 ; Extend # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
|
||||
0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; Extend # Mn BENGALI SIGN CANDRABINDU
|
||||
0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
|
@ -190,6 +195,8 @@
|
|||
1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
|
||||
108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
|
||||
109A..109C ; Extend # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
|
||||
109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
|
@ -216,6 +223,18 @@
|
|||
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
|
||||
1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
|
||||
1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA
|
||||
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
|
||||
1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI
|
||||
1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
|
||||
1A60 ; Extend # Mn TAI THAM SIGN SAKOT
|
||||
1A61 ; Extend # Mc TAI THAM VOWEL SIGN A
|
||||
1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT
|
||||
1A63..1A64 ; Extend # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
|
||||
1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
|
||||
1A6D..1A72 ; Extend # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
|
||||
1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B04 ; Extend # Mc BALINESE SIGN BISAH
|
||||
1B34 ; Extend # Mn BALINESE SIGN REREKAN
|
||||
|
@ -238,20 +257,28 @@
|
|||
1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
|
||||
1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
|
||||
1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
|
||||
1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Extend # Mn VEDIC SIGN TIRYAK
|
||||
1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA
|
||||
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
1DFD..1DFF ; Extend # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
|
||||
2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
A66F ; Extend # Mn COMBINING CYRILLIC VZMET
|
||||
A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
|
||||
A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
|
||||
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
|
||||
A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
|
||||
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
|
||||
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
|
||||
|
@ -261,9 +288,18 @@ A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
|
|||
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
|
||||
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
|
||||
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
|
||||
A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
|
||||
A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
|
||||
A983 ; Extend # Mc JAVANESE SIGN WIGNYAN
|
||||
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
|
||||
A9B4..A9B5 ; Extend # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
|
||||
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
|
||||
A9BA..A9BB ; Extend # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
|
||||
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
|
||||
A9BD..A9C0 ; Extend # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
|
||||
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
|
||||
AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
|
||||
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
|
||||
|
@ -272,6 +308,19 @@ AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
|
|||
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
|
||||
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
|
||||
AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H
|
||||
AA7B ; Extend # Mc MYANMAR SIGN PAO KAREN TONE
|
||||
AAB0 ; Extend # Mn TAI VIET MAI KANG
|
||||
AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
|
||||
AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
|
||||
AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
|
||||
AAC1 ; Extend # Mn TAI VIET TONE MAI THO
|
||||
ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
|
||||
ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP
|
||||
ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
|
||||
ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP
|
||||
ABE9..ABEA ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
|
||||
ABEC ; Extend # Mc MEETEI MAYEK LUM IYEK
|
||||
ABED ; Extend # Mn MEETEI MAYEK APUN IYEK
|
||||
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
|
@ -282,6 +331,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
|
||||
11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
|
||||
11082 ; Extend # Mc KAITHI SIGN VISARGA
|
||||
110B0..110B2 ; Extend # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
|
||||
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
|
||||
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
|
||||
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
|
@ -291,7 +346,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1285
|
||||
# Total code points: 1455
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -300,13 +355,13 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
|||
06DD ; Format # Cf ARABIC END OF AYAH
|
||||
070F ; Format # Cf SYRIAC ABBREVIATION MARK
|
||||
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
|
||||
200B ; Format # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
|
||||
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
|
||||
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
|
||||
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
|
||||
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
|
||||
110BD ; Format # Cf KAITHI NUMBER SIGN
|
||||
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
|
||||
E0001 ; Format # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
@ -362,7 +417,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
|
||||
03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
|
||||
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
|
||||
048A..0523 ; ALetter # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
|
||||
048A..0525 ; ALetter # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
|
||||
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
|
||||
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||||
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
|
@ -386,13 +441,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
|
||||
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
|
||||
07FA ; ALetter # Lm NKO LAJANYALAN
|
||||
0800..0815 ; ALetter # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
|
||||
081A ; ALetter # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
|
||||
0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A
|
||||
0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I
|
||||
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
0950 ; ALetter # Lo DEVANAGARI OM
|
||||
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972 ; ALetter # Lo DEVANAGARI LETTER CANDRA A
|
||||
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
|
||||
0979..097F ; ALetter # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
|
||||
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
|
||||
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
|
||||
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
|
||||
|
@ -479,10 +538,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
|
||||
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
|
||||
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
|
||||
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
|
||||
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
|
||||
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
|
||||
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
|
||||
1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
|
||||
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
|
||||
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
|
||||
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
|
||||
|
@ -501,7 +557,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
|
||||
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
|
||||
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
|
||||
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
|
||||
166F..167F ; ALetter # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
|
||||
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
|
||||
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
|
||||
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
|
||||
|
@ -516,6 +572,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
|
||||
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||||
18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
|
||||
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
|
||||
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
|
||||
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
|
||||
|
@ -526,6 +583,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
|
||||
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
|
||||
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
|
||||
1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
|
||||
1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
|
||||
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
|
||||
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
|
||||
|
@ -551,8 +610,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
|
||||
2107 ; ALetter # L& EULER CONSTANT
|
||||
|
@ -575,10 +634,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
2C60..2C6F ; ALetter # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
|
||||
2C71..2C7C ; ALetter # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
|
||||
2C60..2C7C ; ALetter # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
|
||||
2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V
|
||||
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
|
||||
2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI
|
||||
2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
|
||||
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
|
||||
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
|
@ -601,6 +660,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
|
||||
A015 ; ALetter # Lm YI SYLLABLE WU
|
||||
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
|
||||
A4D0..A4F7 ; ALetter # Lo [40] LISU LETTER BA..LISU LETTER OE
|
||||
A4F8..A4FD ; ALetter # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
|
||||
A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
|
||||
A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER
|
||||
A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
|
||||
|
@ -610,6 +671,8 @@ A662..A66D ; ALetter # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMA
|
|||
A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O
|
||||
A67F ; ALetter # Lm CYRILLIC PAYEROK
|
||||
A680..A697 ; ALetter # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
|
||||
A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
|
||||
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
|
||||
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
|
||||
A770 ; ALetter # Lm MODIFIER LETTER US
|
||||
|
@ -622,12 +685,20 @@ A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER G
|
|||
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
|
||||
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
|
||||
A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
|
||||
A8F2..A8F7 ; ALetter # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
|
||||
A8FB ; ALetter # Lo DEVANAGARI HEADSTROKE
|
||||
A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
|
||||
A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A
|
||||
A960..A97C ; ALetter # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
|
||||
A984..A9B2 ; ALetter # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
|
||||
A9CF ; ALetter # Lm JAVANESE PANGRANGKEP
|
||||
AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA
|
||||
AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
|
||||
AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
|
||||
ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
|
||||
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
|
||||
D7B0..D7C6 ; ALetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
|
||||
D7CB..D7FB ; ALetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
|
||||
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
|
||||
|
@ -677,15 +748,22 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
|
|||
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
|
||||
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
|
||||
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
|
||||
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
|
||||
1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
|
||||
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
|
||||
10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
|
||||
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
|
||||
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
|
||||
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
|
||||
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
|
||||
10A60..10A7C ; ALetter # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
|
||||
10B00..10B35 ; ALetter # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
|
||||
10B40..10B55 ; ALetter # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
|
||||
10B60..10B72 ; ALetter # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
|
||||
10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
|
||||
11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
|
||||
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
|
||||
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
|
||||
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
|
||||
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
|
||||
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
|
||||
|
@ -717,7 +795,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
|
|||
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
|
||||
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
|
||||
|
||||
# Total code points: 21903
|
||||
# Total code points: 23694
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -788,7 +866,9 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
|
|||
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
|
||||
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
|
||||
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
|
||||
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
19D0..19DA ; Numeric # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
|
||||
1A80..1A89 ; Numeric # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
|
||||
1A90..1A99 ; Numeric # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
|
||||
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
|
||||
1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
|
||||
1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
|
||||
|
@ -796,11 +876,13 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
|
|||
A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
|
||||
A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
|
||||
A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
|
||||
A9D0..A9D9 ; Numeric # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
|
||||
AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
|
||||
ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
|
||||
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
|
||||
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
|
||||
# Total code points: 361
|
||||
# Total code points: 402
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ USING: help.markup help.syntax strings ;
|
|||
IN: unicode
|
||||
|
||||
ARTICLE: "unicode" "Unicode support"
|
||||
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
|
||||
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.2 character set."
|
||||
$nl
|
||||
"The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
|
||||
$nl
|
||||
|
|
Loading…
Reference in New Issue