Merge branch 'master' into new_ui

db4
Slava Pestov 2009-01-07 12:27:54 -06:00
commit d9018d40c7
20 changed files with 2681 additions and 106 deletions

View File

@ -29,8 +29,7 @@ ABOUT: "grouping"
HELP: groups
{ $class-description "Instances are virtual sequences whose elements are disjoint fixed-length subsequences of an underlying sequence. Groups are mutable and resizable if the underlying sequence is mutable and resizable, respectively."
$nl
"New groups are created by calling " { $link <groups> } " and " { $link <sliced-groups> } "." }
{ $see-also group } ;
"New groups are created by calling " { $link <groups> } " and " { $link <sliced-groups> } "." } ;
HELP: group
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "array" "a sequence of sequences" } }
@ -48,11 +47,16 @@ HELP: <groups>
"USING: arrays kernel prettyprint sequences grouping ;"
"9 >array 3 <groups> dup reverse-here concat >array ." "{ 6 7 8 3 4 5 0 1 2 }"
}
{ $example
"USING: kernel prettyprint sequences grouping ;"
"{ 1 2 3 4 5 6 } 3 <groups> 0 swap nth ."
"{ 1 2 3 }"
}
} ;
HELP: <sliced-groups>
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "groups" groups } }
{ $description "Outputs a virtual sequence whose elements are overlapping subsequences of " { $snippet "n" } " elements from the underlying sequence." }
{ $description "Outputs a virtual sequence whose elements are slices of disjoint subsequences of " { $snippet "n" } " elements from the underlying sequence." }
{ $examples
{ $example
"USING: arrays kernel prettyprint sequences grouping ;"
@ -60,6 +64,11 @@ HELP: <sliced-groups>
"dup [ reverse-here ] each concat >array ."
"{ 2 1 0 5 4 3 8 7 6 }"
}
{ $example
"USING: kernel prettyprint sequences grouping ;"
"{ 1 2 3 4 5 6 } 3 <sliced-groups> 1 swap nth ."
"T{ slice { from 3 } { to 6 } { seq { 1 2 3 4 5 6 } } }"
}
} ;
HELP: clumps
@ -89,11 +98,23 @@ HELP: <clumps>
"share-price 4 <clumps> [ [ sum ] [ length ] bi / ] map ."
"{ 113/400 167/400 201/400 241/400 243/400 91/200 1/4 }"
}
{ $example
"USING: kernel sequences grouping prettyprint ;"
"{ 1 2 3 4 5 6 } 3 <clumps> second ."
"{ 2 3 4 }"
}
} ;
HELP: <sliced-clumps>
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "clumps" clumps } }
{ $description "Outputs a virtual sequence whose elements are overlapping slices of " { $snippet "n" } " elements from the underlying sequence." } ;
{ $description "Outputs a virtual sequence whose elements are overlapping slices of " { $snippet "n" } " elements from the underlying sequence." }
{ $examples
{ $example
"USING: kernel sequences grouping prettyprint ;"
"{ 1 2 3 4 5 6 } 3 <sliced-clumps> second ."
"T{ slice { from 1 } { to 4 } { seq { 1 2 3 4 5 6 } } }"
}
} ;
{ clumps groups } related-words

View File

@ -0,0 +1,66 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax kernel quotations ;
IN: io.directories.search
HELP: each-file
{ $values
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
}
{ $description "Performs a directory traversal, breadth-first or depth-first, and calls the quotation on the full pathname of each file." }
{ $examples
{ $unchecked-example "USING: sequences io.directories.search ;"
"\"resource:misc\" t [ . ] each-file"
"! Recursive directory listing prints here"
}
} ;
HELP: recursive-directory
{ $values
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" }
{ "paths" "a sequence of pathname strings" }
}
{ $description "Traverses a directory path recursively and returns a sequence of files in a breadth-first or depth-first manner." } ;
HELP: find-file
{ $values
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
{ "path/f" "a pathname string or f" }
}
{ $description "Finds the first file in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
HELP: find-in-directories
{ $values
{ "directories" "a sequence of pathnames" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
{ "path'" "a pathname string" }
}
{ $description "Finds the first file in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
HELP: find-all-files
{ $values
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
{ "paths" "a sequence of pathname strings" }
}
{ $description "Finds all files in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
HELP: find-all-in-directories
{ $values
{ "directories" "a sequence of directory paths" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
{ "paths" "a sequence of pathname strings" }
}
{ $description "Finds all files in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
{ find-file find-all-files find-in-directories find-all-in-directories } related-words
ARTICLE: "io.directories.search" "io.directories.search"
"The " { $vocab-link "io.directories.search" } " vocabulary contains words used for recursively iterating over a directory and for finding files in a directory tree." $nl
"Traversing directories:"
{ $subsection recursive-directory }
{ $subsection each-file }
"Finding files:"
{ $subsection find-file }
{ $subsection find-all-files }
{ $subsection find-in-directories }
{ $subsection find-all-in-directories } ;
ABOUT: "io.directories.search"

View File

@ -5,10 +5,10 @@ io.directories io.files io.files.info io.pathnames kernel
sequences system vocabs.loader ;
IN: io.directories.search
TUPLE: directory-iterator path bfs queue ;
<PRIVATE
TUPLE: directory-iterator path bfs queue ;
: qualified-directory ( path -- seq )
dup directory-files [ append-path ] with map ;
@ -38,22 +38,25 @@ TUPLE: directory-iterator path bfs queue ;
PRIVATE>
: each-file ( path bfs? quot: ( obj -- ) -- )
[ <directory-iterator> ] dip
[ f ] compose iterate-directory drop ; inline
: recursive-directory ( path bfs? -- paths )
[ ] accumulator [ each-file ] dip ;
: find-file ( path bfs? quot: ( obj -- ? ) -- path/f )
[ <directory-iterator> ] dip
[ keep and ] curry iterate-directory ; inline
: each-file ( path bfs? quot: ( obj -- ? ) -- )
[ <directory-iterator> ] dip
[ f ] compose iterate-directory drop ; inline
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths )
[ <directory-iterator> ] dip
pusher [ [ f ] compose iterate-directory drop ] dip ; inline
: recursive-directory ( path bfs? -- paths )
[ ] accumulator [ each-file ] dip ;
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path' )
'[ _ _ find-file ] attempt-all ;
: find-in-directories ( directories bfs? quot -- path' )
'[ _ _ find-file ] attempt-all ; inline
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths )
'[ _ _ find-all-files ] map concat ;
os windows? [ "io.directories.search.windows" require ] when

View File

@ -0,0 +1,311 @@
# GraphemeBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:22 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
#
# Default Grapheme Break Test
#
# Format:
# <string> (# <comment>)?
# <string> contains hex Unicode code points, with
# ÷ wherever there is a break opportunity, and
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the line_break property* for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.
#
÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0020 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0020 × 0308 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000D ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 000A ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0001 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0300 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0300 × 0308 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0E40 × 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
÷ 0E40 × 0308 ÷ 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0E40 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0E40 × 0308 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0E40 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0E40 × 0308 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0E40 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0E40 × 0308 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0E40 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0E40 × 0308 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0E40 × 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0E40 × 0308 ÷ 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0E40 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0E40 × 0308 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0E40 × 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0E40 × 0308 ÷ 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0E40 × 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0E40 × 0308 ÷ 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0E40 × 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0E40 × 0308 ÷ 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0E40 × AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0E40 × 0308 ÷ AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0E40 × AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0E40 × 0308 ÷ AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 0903 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0903 × 0308 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1100 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 1100 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 1160 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 1160 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ 11A8 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 11A8 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC00 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ AC00 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
÷ AC01 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ AC01 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
# Lines: 288

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,39 @@
USING: tools.test unicode.breaks sequences math kernel ;
USING: tools.test unicode.breaks sequences math kernel splitting
unicode.categories io.pathnames io.encodings.utf8 io.files
strings quotations math.parser locals ;
IN: unicode.breaks.tests
[ "\u001112\u001161\u0011abA\u000300a\r\r\n" ]
[ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test
[ "dcba" ] [ "abcd" string-reverse ] unit-test
[ 3 ] [ "\u001112\u001161\u0011abA\u000300a"
dup last-grapheme head last-grapheme ] unit-test
: grapheme-break-test ( -- filename )
"resource:basis/unicode/breaks/GraphemeBreakTest.txt" ;
: word-break-test ( -- filename )
"resource:basis/unicode/breaks/WordBreakTest.txt" ;
: parse-test-file ( file-name -- tests )
utf8 file-lines
[ "#" split1 drop ] map harvest [
"÷" split
[ "×" split [ [ blank? ] trim hex> ] map harvest >string ] map
harvest
] map ;
:: test ( tests quot -- )
tests [
[ 1quotation ]
[ concat [ quot call [ "" like ] map ] curry ] bi unit-test
] each ;
: grapheme-test ( tests quot -- )
[
[ 1quotation ]
[ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test
] each ;
grapheme-break-test parse-test-file [ >graphemes ] test
! word-break-test parse-test-file [ >words ] test

View File

@ -4,15 +4,21 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize values
io.encodings.ascii unicode.syntax unicode.data compiler.units
alien.syntax sets ;
alien.syntax sets accessors interval-maps memoize locals words ;
IN: unicode.breaks
C-ENUM: Any L V T Extend Control CR LF graphemes ;
! Grapheme breaks
C-ENUM: Any L V T LV LVT Extend Control CR LF
SpacingMark Prepend graphemes ;
: jamo-class ( ch -- class )
dup initial? [ drop L ]
[ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
: hangul-class ( ch -- class )
hangul-base - HEX: 1C mod zero? LV LVT ? ;
CATEGORY: grapheme-control Zl Zp Cc Cf ;
: control-class ( ch -- class )
{
@ -27,16 +33,24 @@ CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? )
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
: loe? ( ch -- ? )
"Logical_Order_Exception" property? ;
CATEGORY: spacing Mc ;
: grapheme-class ( ch -- class )
{
{ [ dup jamo? ] [ jamo-class ] }
{ [ dup hangul? ] [ hangul-class ] }
{ [ dup grapheme-control? ] [ control-class ] }
{ [ extend? ] [ Extend ] }
{ [ dup extend? ] [ drop Extend ] }
{ [ dup spacing? ] [ drop SpacingMark ] }
{ [ loe? ] [ Prepend ] }
[ Any ]
} cond ;
: init-grapheme-table ( -- table )
graphemes [ graphemes f <array> ] replicate ;
: init-table ( size -- table )
dup [ f <array> ] curry replicate ;
SYMBOL: table
@ -49,22 +63,32 @@ SYMBOL: table
: connect ( class1 class2 -- ) 1 set-table ;
: disconnect ( class1 class2 -- ) 0 set-table ;
: check-before ( class classes value -- )
[ set-table ] curry with each ;
: check-after ( classes class value -- )
[ set-table ] 2curry each ;
: connect-before ( class classes -- )
[ connect ] with each ;
1 check-before ;
: connect-after ( classes class -- )
[ connect ] curry each ;
1 check-after ;
: break-around ( classes1 classes2 -- )
[ [ 2dup disconnect swap disconnect ] with each ] curry each ;
: make-grapheme-table ( -- )
CR LF connect
Control CR LF 3array graphemes break-around
L L V 2array connect-before
L L V LV LVT 4array connect-before
V V T 2array connect-before
LV V T 2array connect-before
T T connect
graphemes Extend connect-after ;
LVT T connect
graphemes Extend connect-after
graphemes SpacingMark connect-after
Prepend graphemes connect-before ;
VALUE: grapheme-table
@ -82,14 +106,17 @@ VALUE: grapheme-table
[ grapheme-class tuck grapheme-break? ] find-index
nip swap length or 1+ ;
: (>graphemes) ( str -- )
[
dup first-grapheme cut-slice
swap , (>graphemes)
:: (>pieces) ( str quot -- )
str [
dup quot call cut-slice
swap , quot (>pieces)
] unless-empty ;
: >pieces ( str quot -- graphemes )
[ (>pieces) ] { } make ;
: >graphemes ( str -- graphemes )
[ (>graphemes) ] { } make ;
[ first-grapheme ] >pieces ;
: string-reverse ( str -- rts )
>graphemes reverse concat ;
@ -98,6 +125,82 @@ VALUE: grapheme-table
unclip-last-slice grapheme-class swap
[ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
init-grapheme-table table
graphemes init-table table
[ make-grapheme-table finish-table ] with-variable
to: grapheme-table
! Word breaks
VALUE: word-break-table
"resource:basis/unicode/data/WordBreakProperty.txt" load-script
to: word-break-table
C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
wMidNum wMidNumLet wNumeric wExtendNumLet words ;
MEMO: word-break-classes ( -- table )
H{
{ "Other" wOther } { "CR" wCR } { "LF" wLF } { "Newline" wNewline }
{ "Extend" wExtend } { "Format" wFormat } { "Katakana" wKatakana }
{ "ALetter" wALetter } { "MidLetter" wMidLetter }
{ "MidNum" wMidNum } { "MidNumLet" wMidNumLet } { "Numeric" wNumeric }
{ "ExtendNumLet" wExtendNumLet }
} [ execute ] assoc-map ;
: word-break-prop ( char -- word-break-prop )
word-break-table interval-at
word-break-classes at [ wOther ] unless* ;
: e ( seq -- seq ) [ execute ] map ;
SYMBOL: check-letter-before
SYMBOL: check-letter-after
SYMBOL: check-number-before
SYMBOL: check-number-after
: make-word-table ( -- )
wCR wLF connect
{ wNewline wCR wLF } e words break-around
wALetter dup connect
wALetter { wMidLetter wMidNumLet } e check-letter-after check-before
{ wMidLetter wMidNumLet } e wALetter check-letter-before check-after
wNumeric dup connect
wALetter wNumeric connect
wNumeric wALetter connect
wNumeric { wMidNum wMidNumLet } e check-number-after check-before
{ wMidNum wMidNumLet } e wNumeric check-number-before check-after
wKatakana dup connect
{ wALetter wNumeric wKatakana wExtendNumLet } e wExtendNumLet
[ connect-after ] [ swap connect-before ] 2bi ;
VALUE: word-table
: finish-word-table ( -- table )
table get [
[ { { 0 [ f ] } { 1 [ t ] } [ ] } case ] map
] map ;
words init-table table
[ make-word-table finish-word-table ] with-variable
to: word-table
: word-break? ( class1 class2 -- ? )
word-table nth nth not ;
: skip? ( char -- ? )
word-break-prop { 4 5 } member? ; ! wExtend or wFormat
: word-break-next ( old-class new-char -- next-class ? )
word-break-prop dup { 4 5 } member?
[ drop f ] [ tuck word-break? ] if ;
: first-word ( str -- i )
unclip-slice word-break-prop over
[ word-break-next ] find-index
nip swap length or 1+ ;
! This must be changed to ignore format/extended chars and
! handle symbols in the table specially
: >words ( str -- words )
[ first-word ] >pieces ;

View File

@ -0,0 +1,197 @@
# CompositionExclusions-5.1.0.txt
# Date: 2008-03-20, 17:45:00 PDT [KW]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2008 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see
# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
#
# For a full derivation of composition exclusions, see the derived property
# Full_Composition_Exclusion in DerivedNormalizationProps.txt
#
# ================================================
# (1) Script Specifics
#
# This list of characters cannot be derived from the UnicodeData.txt file.
# ================================================
0958 # DEVANAGARI LETTER QA
0959 # DEVANAGARI LETTER KHHA
095A # DEVANAGARI LETTER GHHA
095B # DEVANAGARI LETTER ZA
095C # DEVANAGARI LETTER DDDHA
095D # DEVANAGARI LETTER RHA
095E # DEVANAGARI LETTER FA
095F # DEVANAGARI LETTER YYA
09DC # BENGALI LETTER RRA
09DD # BENGALI LETTER RHA
09DF # BENGALI LETTER YYA
0A33 # GURMUKHI LETTER LLA
0A36 # GURMUKHI LETTER SHA
0A59 # GURMUKHI LETTER KHHA
0A5A # GURMUKHI LETTER GHHA
0A5B # GURMUKHI LETTER ZA
0A5E # GURMUKHI LETTER FA
0B5C # ORIYA LETTER RRA
0B5D # ORIYA LETTER RHA
0F43 # TIBETAN LETTER GHA
0F4D # TIBETAN LETTER DDHA
0F52 # TIBETAN LETTER DHA
0F57 # TIBETAN LETTER BHA
0F5C # TIBETAN LETTER DZHA
0F69 # TIBETAN LETTER KSSA
0F76 # TIBETAN VOWEL SIGN VOCALIC R
0F78 # TIBETAN VOWEL SIGN VOCALIC L
0F93 # TIBETAN SUBJOINED LETTER GHA
0F9D # TIBETAN SUBJOINED LETTER DDHA
0FA2 # TIBETAN SUBJOINED LETTER DHA
0FA7 # TIBETAN SUBJOINED LETTER BHA
0FAC # TIBETAN SUBJOINED LETTER DZHA
0FB9 # TIBETAN SUBJOINED LETTER KSSA
FB1D # HEBREW LETTER YOD WITH HIRIQ
FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
FB2A # HEBREW LETTER SHIN WITH SHIN DOT
FB2B # HEBREW LETTER SHIN WITH SIN DOT
FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
FB2E # HEBREW LETTER ALEF WITH PATAH
FB2F # HEBREW LETTER ALEF WITH QAMATS
FB30 # HEBREW LETTER ALEF WITH MAPIQ
FB31 # HEBREW LETTER BET WITH DAGESH
FB32 # HEBREW LETTER GIMEL WITH DAGESH
FB33 # HEBREW LETTER DALET WITH DAGESH
FB34 # HEBREW LETTER HE WITH MAPIQ
FB35 # HEBREW LETTER VAV WITH DAGESH
FB36 # HEBREW LETTER ZAYIN WITH DAGESH
FB38 # HEBREW LETTER TET WITH DAGESH
FB39 # HEBREW LETTER YOD WITH DAGESH
FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
FB3B # HEBREW LETTER KAF WITH DAGESH
FB3C # HEBREW LETTER LAMED WITH DAGESH
FB3E # HEBREW LETTER MEM WITH DAGESH
FB40 # HEBREW LETTER NUN WITH DAGESH
FB41 # HEBREW LETTER SAMEKH WITH DAGESH
FB43 # HEBREW LETTER FINAL PE WITH DAGESH
FB44 # HEBREW LETTER PE WITH DAGESH
FB46 # HEBREW LETTER TSADI WITH DAGESH
FB47 # HEBREW LETTER QOF WITH DAGESH
FB48 # HEBREW LETTER RESH WITH DAGESH
FB49 # HEBREW LETTER SHIN WITH DAGESH
FB4A # HEBREW LETTER TAV WITH DAGESH
FB4B # HEBREW LETTER VAV WITH HOLAM
FB4C # HEBREW LETTER BET WITH RAFE
FB4D # HEBREW LETTER KAF WITH RAFE
FB4E # HEBREW LETTER PE WITH RAFE
# Total code points: 67
# ================================================
# (2) Post Composition Version precomposed characters
#
# These characters cannot be derived solely from the UnicodeData.txt file
# in this version of Unicode.
#
# Note that characters added to the standard after the
# Composition Version and which have canonical decomposition mappings
# are not automatically added to this list of Post Composition
# Version precomposed characters.
# ================================================
2ADC # FORKING
1D15E # MUSICAL SYMBOL HALF NOTE
1D15F # MUSICAL SYMBOL QUARTER NOTE
1D160 # MUSICAL SYMBOL EIGHTH NOTE
1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB # MUSICAL SYMBOL MINIMA
1D1BC # MUSICAL SYMBOL MINIMA BLACK
1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
1D1BF # MUSICAL SYMBOL FUSA WHITE
1D1C0 # MUSICAL SYMBOL FUSA BLACK
# Total code points: 14
# ================================================
# (3) Singleton Decompositions
#
# These characters can be derived from the UnicodeData.txt file
# by including all characters whose canonical decomposition
# consists of a single character.
#
# These characters are simply quoted here for reference.
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
# ================================================
# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
# 0343 COMBINING GREEK KORONIS
# 0374 GREEK NUMERAL SIGN
# 037E GREEK QUESTION MARK
# 0387 GREEK ANO TELEIA
# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
# 1F75 GREEK SMALL LETTER ETA WITH OXIA
# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
# 1FBE GREEK PROSGEGRAMMENI
# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
# 1FFD GREEK OXIA
# 2000..2001 [2] EN QUAD..EM QUAD
# 2126 OHM SIGN
# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
# 2329 LEFT-POINTING ANGLE BRACKET
# 232A RIGHT-POINTING ANGLE BRACKET
# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 1030
# ================================================
# (4) Non-Starter Decompositions
#
# These characters can be derived from the UnicodeData file
# by including all characters whose canonical decomposition consists
# of a sequence of characters, the first of which has a non-zero
# combining class.
#
# These characters are simply quoted here for reference.
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
# ================================================
# 0344 COMBINING GREEK DIALYTIKA TONOS
# 0F73 TIBETAN VOWEL SIGN II
# 0F75 TIBETAN VOWEL SIGN UU
# 0F81 TIBETAN VOWEL SIGN REVERSED II
# Total code points: 4

View File

@ -0,0 +1,816 @@
# WordBreakProperty-5.1.0.txt
# Date: 2008-03-20, 17:55:36 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# ================================================
# Property: Word_Break
# All code points not explicitly listed for Word_Break
# have the value Other (XX).
# @missing: 0000..10FFFF; Other
# ================================================
000D ; CR # Cc <control-000D>
# Total code points: 1
# ================================================
000A ; LF # Cc <control-000A>
# Total code points: 1
# ================================================
000B..000C ; Newline # Cc [2] <control-000B>..<control-000C>
0085 ; Newline # Cc <control-0085>
2028 ; Newline # Zl LINE SEPARATOR
2029 ; Newline # Zp PARAGRAPH SEPARATOR
# Total code points: 5
# ================================================
0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BF ; Extend # Mn HEBREW POINT RAFE
05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN
0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
064B..065E ; Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF
06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Extend # Me ARABIC START OF RUB EL HIZB
06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
0711 ; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Extend # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; Extend # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
09BC ; Extend # Mn BENGALI SIGN NUKTA
09BE..09C0 ; Extend # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
09C1..09C4 ; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
09C7..09C8 ; Extend # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
09CB..09CC ; Extend # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
09CD ; Extend # Mn BENGALI SIGN VIRAMA
09D7 ; Extend # Mc BENGALI AU LENGTH MARK
09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
0A03 ; Extend # Mc GURMUKHI SIGN VISARGA
0A3C ; Extend # Mn GURMUKHI SIGN NUKTA
0A3E..0A40 ; Extend # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
0A51 ; Extend # Mn GURMUKHI SIGN UDAAT
0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
0A75 ; Extend # Mn GURMUKHI SIGN YAKASH
0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
0A83 ; Extend # Mc GUJARATI SIGN VISARGA
0ABC ; Extend # Mn GUJARATI SIGN NUKTA
0ABE..0AC0 ; Extend # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
0AC9 ; Extend # Mc GUJARATI VOWEL SIGN CANDRA O
0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B3C ; Extend # Mn ORIYA SIGN NUKTA
0B3E ; Extend # Mc ORIYA VOWEL SIGN AA
0B3F ; Extend # Mn ORIYA VOWEL SIGN I
0B40 ; Extend # Mc ORIYA VOWEL SIGN II
0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
0BBE..0BBF ; Extend # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
0BC0 ; Extend # Mn TAMIL VOWEL SIGN II
0BC1..0BC2 ; Extend # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
0BC6..0BC8 ; Extend # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
0BCA..0BCC ; Extend # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
0BCD ; Extend # Mn TAMIL SIGN VIRAMA
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
0C01..0C03 ; Extend # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C41..0C44 ; Extend # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C82..0C83 ; Extend # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0CBC ; Extend # Mn KANNADA SIGN NUKTA
0CBE ; Extend # Mc KANNADA VOWEL SIGN AA
0CBF ; Extend # Mn KANNADA VOWEL SIGN I
0CC0..0CC4 ; Extend # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
0CC6 ; Extend # Mn KANNADA VOWEL SIGN E
0CC7..0CC8 ; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
0CCA..0CCB ; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; Extend # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D82..0D83 ; Extend # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
0DCF..0DD1 ; Extend # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
0DD8..0DDF ; Extend # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
0DF2..0DF3 ; Extend # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
0E31 ; Extend # Mn THAI CHARACTER MAI HAN-AKAT
0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN
0EB4..0EB9 ; Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
0EBB..0EBC ; Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
0F39 ; Extend # Mn TIBETAN MARK TSA -PHRU
0F3E..0F3F ; Extend # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
0F7F ; Extend # Mc TIBETAN SIGN RNAM BCAD
0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
0F90..0F97 ; Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN
102B..102C ; Extend # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
1031 ; Extend # Mc MYANMAR VOWEL SIGN E
1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
1038 ; Extend # Mc MYANMAR SIGN VISARGA
1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
103B..103C ; Extend # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
1056..1057 ; Extend # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
1062..1064 ; Extend # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
1067..106D ; Extend # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Extend # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
17B6 ; Extend # Mc KHMER VOWEL SIGN AA
17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
17C6 ; Extend # Mn KHMER SIGN NIKAHIT
17C7..17C8 ; Extend # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Extend # Mn KHMER SIGN ATTHACAN
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
1929..192B ; Extend # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
1930..1931 ; Extend # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA
1933..1938 ; Extend # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
19B0..19C0 ; Extend # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Extend # Mc BALINESE SIGN BISAH
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
1B3B ; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA
1B3D..1B41 ; Extend # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET
1B43..1B44 ; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
1B82 ; Extend # Mc SUNDANESE SIGN PANGWISAD
1BA1 ; Extend # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH
1C24..1C2B ; Extend # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; Extend # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A823..A824 ; Extend # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA33..AA34 ; Extend # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1285
# ================================================
00AD ; Format # Cf SOFT HYPHEN
0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 137
# ================================================
3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
# Total code points: 309
# ================================================
0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA ; ALetter # L& FEMININE ORDINAL INDICATOR
00B5 ; ALetter # L& MICRO SIGN
00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR
00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE
01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EC ; ALetter # Lm MODIFIER LETTER VOICING
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
0370..0373 ; ALetter # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0374 ; ALetter # Lm GREEK NUMERAL SIGN
0376..0377 ; ALetter # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A ; ALetter # Lm GREEK YPOGEGRAMMENI
037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
048A..0523 ; ALetter # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
0621..063F ; ALetter # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 ; ALetter # Lm ARABIC TATWEEL
0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D5 ; ALetter # Lo ARABIC LETTER AE
06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V
0710 ; ALetter # Lo SYRIAC LETTER ALAPH
0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
074D..07A5 ; ALetter # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
07B1 ; ALetter # Lo THAANA LETTER NAA
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07FA ; ALetter # Lm NKO LAJANYALAN
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; ALetter # Lo DEVANAGARI OM
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; ALetter # Lo DEVANAGARI LETTER CANDRA A
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
09B2 ; ALetter # Lo BENGALI LETTER LA
09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA
09CE ; ALetter # Lo BENGALI LETTER KHANDA TA
09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
0A5E ; ALetter # Lo GURMUKHI LETTER FA
0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA
0AD0 ; ALetter # Lo GUJARATI OM
0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA
0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
0B71 ; ALetter # Lo ORIYA LETTER WA
0B83 ; ALetter # Lo TAMIL SIGN VISARGA
0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
0B9C ; ALetter # Lo TAMIL LETTER JA
0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
0BD0 ; ALetter # Lo TAMIL OM
0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA
0C58..0C59 ; ALetter # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA
0CDE ; ALetter # Lo KANNADA LETTER FA
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA
0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D7A..0D7F ; ALetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA
0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
0F00 ; ALetter # Lo TIBETAN SYLLABLE OM
0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
0F49..0F6C ; ALetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA
12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA
1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
1B83..1BA0 ; ALetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
1BAE..1BAF ; ALetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
1C00..1C23 ; ALetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN
1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
1E00..1F15 ; ALetter # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE ; ALetter # L& GREEK PROSGEGRAMMENI
1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
2107 ; ALetter # L& EULER CONSTANT
210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N
2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z
2126 ; ALetter # L& OHM SIGN
2128 ; ALetter # L& BLACK-LETTER CAPITAL Z
212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O
2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL
2139 ; ALetter # L& INFORMATION SOURCE
213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
214E ; ALetter # L& TURNED SMALL F
2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
2185..2188 ; ALetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C6F ; ALetter # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
2C71..2C7C ; ALetter # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
2E2F ; ALetter # Lm VERTICAL TILDE
3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK
303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
303C ; ALetter # Lo MASU MARK
3105..312D ; ALetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; ALetter # Lm YI SYLLABLE WU
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER
A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
A62A..A62B ; ALetter # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
A640..A65F ; ALetter # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
A662..A66D ; ALetter # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O
A67F ; ALetter # Lm CYRILLIC PAYEROK
A680..A697 ; ALetter # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; ALetter # Lm MODIFIER LETTER US
A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A78B..A78C ; ALetter # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
A7FB..A801 ; ALetter # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I
A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A
AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10280..1029C ; ALetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
102A0..102D0 ; ALetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
10341 ; ALetter # Nl GOTHIC LETTER NINETY
10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED
10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10808 ; ALetter # Lo CYPRIOT SYLLABLE JO
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G
1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F
1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
# Total code points: 21903
# ================================================
003A ; MidLetter # Po COLON
00B7 ; MidLetter # Po MIDDLE DOT
0387 ; MidLetter # Po GREEK ANO TELEIA
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
2027 ; MidLetter # Po HYPHENATION POINT
FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON
FE55 ; MidLetter # Po SMALL COLON
FF1A ; MidLetter # Po FULLWIDTH COLON
# Total code points: 8
# ================================================
002C ; MidNum # Po COMMA
003B ; MidNum # Po SEMICOLON
037E ; MidNum # Po GREEK QUESTION MARK
0589 ; MidNum # Po ARMENIAN FULL STOP
060C..060D ; MidNum # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR
066C ; MidNum # Po ARABIC THOUSANDS SEPARATOR
07F8 ; MidNum # Po NKO COMMA
2044 ; MidNum # Sm FRACTION SLASH
FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA
FE14 ; MidNum # Po PRESENTATION FORM FOR VERTICAL SEMICOLON
FE50 ; MidNum # Po SMALL COMMA
FE54 ; MidNum # Po SMALL SEMICOLON
FF0C ; MidNum # Po FULLWIDTH COMMA
FF1B ; MidNum # Po FULLWIDTH SEMICOLON
# Total code points: 15
# ================================================
0027 ; MidNumLet # Po APOSTROPHE
002E ; MidNumLet # Po FULL STOP
2018 ; MidNumLet # Pi LEFT SINGLE QUOTATION MARK
2019 ; MidNumLet # Pf RIGHT SINGLE QUOTATION MARK
2024 ; MidNumLet # Po ONE DOT LEADER
FE52 ; MidNumLet # Po SMALL FULL STOP
FF07 ; MidNumLet # Po FULLWIDTH APOSTROPHE
FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
# Total code points: 8
# ================================================
0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE
0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066B ; Numeric # Po ARABIC DECIMAL SEPARATOR
06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
1090..1099 ; Numeric # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
1C50..1C59 ; Numeric # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
# Total code points: 361
# ================================================
005F ; ExtendNumLet # Pc LOW LINE
203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; ExtendNumLet # Pc INVERTED UNDERTIE
FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE
# Total code points: 10
# EOF

View File

@ -1,10 +1,10 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators.short-circuit assocs math kernel sequences
io.files hashtables quotations splitting grouping arrays
io.files hashtables quotations splitting grouping arrays io
math.parser hash2 math.order byte-arrays words namespaces words
compiler.units parser io.encodings.ascii values interval-maps
ascii sets combinators locals math.ranges sorting ;
ascii sets combinators locals math.ranges sorting make io.encodings.utf8 ;
IN: unicode.data
VALUE: simple-lower
@ -70,10 +70,20 @@ VALUE: properties
5 swap (process-data)
[ " " split [ hex> ] map ] assoc-map ;
: exclusions-file ( -- filename )
"resource:basis/unicode/data/CompositionExclusions.txt" ;
: exclusions ( -- set )
exclusions-file utf8 file-lines
[ "#" split1 drop [ blank? ] trim-right hex> ] map harvest ;
: remove-exclusions ( alist -- alist )
exclusions [ dup ] H{ } map>assoc assoc-diff ;
: process-canonical ( data -- hash2 hash )
(process-decomposed) [ first* ] filter
[
[ second length 2 = ] filter
[ second length 2 = ] filter remove-exclusions
! using 1009 as the size, the maximum load is 4
[ first2 first2 rot 3array ] map 1009 alist>hash2
] [ >hashtable chain-decomposed ] bi ;
@ -102,6 +112,7 @@ VALUE: properties
"Cc" "Cf" "Cs" "Co" } ;
: num-chars HEX: 2FA1E ;
! the maximum unicode char in the first 3 planes
: ?set-nth ( val index seq -- )
@ -179,3 +190,31 @@ load-data {
load-special-casing to: special-casing
load-properties to: properties
! Utility to load resource files that look like Scripts.txt
SYMBOL: interned
: parse-script ( stream -- assoc )
! assoc is code point/range => name
lines filter-comments [ split-; ] map ;
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
CHAR: . pick member? [
swap ".." split1 [ hex> ] bi@ 2array
] [ swap hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: process-script ( ranges -- table )
dup values prune interned
[ expand-ranges ] with-variable ;
: load-script ( filename -- table )
ascii <file-reader> parse-script process-script ;

View File

@ -41,4 +41,4 @@ IN: unicode.normalize.tests
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
} cleave ;
! parse-test [ run-line ] each
parse-test [ run-line ] each

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors ;
locals sorting.insertion accessors assocs ;
IN: unicode.normalize
! Conjoining Jamo behavior
@ -107,26 +107,34 @@ SYMBOL: char
current to current to current jamo>hangul , ;
: im, ( -- )
current to current 0 jamo>hangul , ;
current to current final-base jamo>hangul , ;
: compose-jamo ( -- )
initial-medial? [
--final? [ imf, ] [ im, ] if
] when to current jamo? [ compose-jamo ] when ;
] [ current , ] if to ;
: pass-combining ( -- )
current non-starter? [ current , to pass-combining ] when ;
: try-compose ( last-class char current-class -- )
swapd = [ after get push ] [
char get over combine-chars
[ nip char set ] [ after get push ] if*
:: try-compose ( last-class new-char current-class -- new-class )
last-class current-class = [ new-char after get push last-class ] [
char get new-char combine-chars
[ char set last-class ]
[ new-char after get push current-class ] if*
] if ;
: compose-iter ( n -- )
DEFER: compose-iter
: try-noncombining ( char -- )
char get swap combine-chars
[ char set to f compose-iter ] when* ;
: compose-iter ( last-class -- )
current [
dup combining-class dup
[ [ try-compose ] keep to compose-iter ] [ 3drop ] if
dup combining-class
[ try-compose to compose-iter ]
[ swap [ drop ] [ try-noncombining ] if ] if*
] [ drop ] if* ;
: ?new-after ( -- )
@ -136,9 +144,8 @@ SYMBOL: char
current [
dup jamo? [ drop compose-jamo ] [
char set to ?new-after
0 compose-iter
f compose-iter
char get , after get %
to
] if (compose)
] when* ;
@ -154,4 +161,4 @@ SYMBOL: char
nfd compose ;
: nfkc ( string -- nfkc )
nfkc compose ;
nfkd compose ;

View File

@ -1,4 +1,4 @@
USING: unicode.script tools.test ;
[ Latin ] [ CHAR: a script-of ] unit-test
[ Common ] [ 0 script-of ] unit-test
[ "Latin" ] [ CHAR: a script-of ] unit-test
[ "Common" ] [ 0 script-of ] unit-test

View File

@ -7,45 +7,10 @@ words words.symbol compiler.units arrays interval-maps
unicode.data ;
IN: unicode.script
<PRIVATE
VALUE: script-table
SYMBOL: interned
: parse-script ( stream -- assoc )
! assoc is code point/range => name
lines filter-comments [ split-; ] map ;
: range, ( value key -- )
swap interned get
[ name>> = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
CHAR: . pick member? [
swap ".." split1 [ hex> ] bi@ 2array
] [ swap hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: >symbols ( strings -- symbols )
[
[ "unicode.script" create dup define-symbol ] map
] with-compilation-unit ;
: process-script ( ranges -- )
dup values prune >symbols interned [
expand-ranges to: script-table
] with-variable ;
: load-script ( -- )
"resource:basis/unicode/script/Scripts.txt"
ascii <file-reader> parse-script process-script ;
load-script
PRIVATE>
SYMBOL: Unknown
"resource:basis/unicode/script/Scripts.txt" load-script
to: script-table
: script-of ( char -- script )
script-table interval-at [ Unknown ] unless* ;
script-table interval-at ;

View File

@ -50,7 +50,7 @@ HELP: set-real-user
HELP: user-passwd
{ $values
{ "obj" object }
{ "passwd" passwd } }
{ "passwd/f" "passwd or f" } }
{ $description "Returns the passwd tuple given a username string or user id." } ;
HELP: username

View File

@ -24,3 +24,7 @@ IN: unix.users.tests
[ ] [ effective-user-id [ ] with-effective-user ] unit-test
[ ] [ [ ] with-user-cache ] unit-test
[ "9999999999999999999" ] [ 9999999999999999999 username ] unit-test
[ f ] [ 89898989898989898989898989898 user-passwd ] unit-test

View File

@ -47,17 +47,18 @@ SYMBOL: user-cache
: with-user-cache ( quot -- )
[ <user-cache> user-cache ] dip with-variable ; inline
GENERIC: user-passwd ( obj -- passwd )
GENERIC: user-passwd ( obj -- passwd/f )
M: integer user-passwd ( id -- passwd/f )
user-cache get
[ at ] [ getpwuid passwd>new-passwd ] if* ;
[ at ] [ getpwuid [ passwd>new-passwd ] [ f ] if* ] if* ;
M: string user-passwd ( string -- passwd/f )
getpwnam dup [ passwd>new-passwd ] when ;
: username ( id -- string )
user-passwd username>> ;
dup user-passwd
[ nip username>> ] [ number>string ] if* ;
: user-id ( string -- id )
user-passwd uid>> ;

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint
quotations sequences splitting state-parser strings ;
quotations sequences splitting state-parser strings
combinators.short-circuit ;
IN: html.parser.utils
: string-parse-end? ( -- ? ) get-next not ;
@ -13,26 +14,26 @@ IN: html.parser.utils
dup length rot length 1- - head next* ;
: trim1 ( seq ch -- newseq )
[ ?head drop ] [ ?tail drop ] bi ;
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
: single-quote ( str -- newstr )
"'" dup surround ;
: quote? ( ch -- ? ) "'\"" member? ;
: double-quote ( str -- newstr )
"\"" dup surround ;
: single-quote ( str -- newstr ) "'" dup surround ;
: double-quote ( str -- newstr ) "\"" dup surround ;
: quote ( str -- newstr )
CHAR: ' over member?
[ double-quote ] [ single-quote ] if ;
: quoted? ( str -- ? )
[ f ]
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] if-empty ;
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: ?quote ( str -- newstr )
dup quoted? [ quote ] unless ;
: ?quote ( str -- newstr ) dup quoted? [ quote ] unless ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;
: quote? ( ch -- ? ) "'\"" member? ;

View File

@ -70,3 +70,8 @@ C: <nil> nil
[ t ] [ pi [ pi ] matches? ] unit-test
[ 0.0 ] [ 0.0 pi + [ pi + ] undo ] unit-test
[ ] [ 3 [ _ ] undo ] unit-test
[ { 1 } ] [ { 1 2 3 } [ { 2 3 } append ] undo ] unit-test
[ { 3 } ] [ { 1 2 3 } [ { 1 2 } prepend ] undo ] unit-test
[ { 1 2 3 } [ { 1 2 } append ] undo ] must-fail
[ { 1 2 3 } [ { 2 3 } prepend ] undo ] must-fail

View File

@ -4,13 +4,13 @@ USING: accessors kernel words summary slots quotations
sequences assocs math arrays stack-checker effects generalizations
continuations debugger classes.tuple namespaces make vectors
bit-arrays byte-arrays strings sbufs math.functions macros
sequences.private combinators mirrors
sequences.private combinators mirrors splitting
combinators.short-circuit fry words.symbol ;
RENAME: _ fry => __
IN: inverse
ERROR: fail ;
M: fail summary drop "Unification failed" ;
M: fail summary drop "Matching failed" ;
: assure ( ? -- ) [ fail ] unless ;
@ -208,6 +208,9 @@ DEFER: _
\ unclip [ prefix ] define-inverse
\ suffix [ dup but-last swap peek ] define-inverse
\ append 1 [ [ ?tail assure ] curry ] define-pop-inverse
\ prepend 1 [ [ ?head assure ] curry ] define-pop-inverse
! Constructor inverse
: deconstruct-pred ( class -- quot )
"predicate" word-prop [ dupd call assure ] curry ;