Merge branch 'master' of git://factorcode.org/git/factor
commit
3e5e0174db
|
@ -1,4 +1,4 @@
|
|||
USING: strings.parser kernel namespaces unicode.data ;
|
||||
USING: strings.parser kernel namespaces unicode unicode.data ;
|
||||
IN: bootstrap.unicode
|
||||
|
||||
[ name>char [ "Invalid character" throw ] unless* ]
|
||||
|
|
|
@ -29,8 +29,7 @@ ABOUT: "grouping"
|
|||
HELP: groups
|
||||
{ $class-description "Instances are virtual sequences whose elements are disjoint fixed-length subsequences of an underlying sequence. Groups are mutable and resizable if the underlying sequence is mutable and resizable, respectively."
|
||||
$nl
|
||||
"New groups are created by calling " { $link <groups> } " and " { $link <sliced-groups> } "." }
|
||||
{ $see-also group } ;
|
||||
"New groups are created by calling " { $link <groups> } " and " { $link <sliced-groups> } "." } ;
|
||||
|
||||
HELP: group
|
||||
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "array" "a sequence of sequences" } }
|
||||
|
@ -48,11 +47,16 @@ HELP: <groups>
|
|||
"USING: arrays kernel prettyprint sequences grouping ;"
|
||||
"9 >array 3 <groups> dup reverse-here concat >array ." "{ 6 7 8 3 4 5 0 1 2 }"
|
||||
}
|
||||
{ $example
|
||||
"USING: kernel prettyprint sequences grouping ;"
|
||||
"{ 1 2 3 4 5 6 } 3 <groups> 0 swap nth ."
|
||||
"{ 1 2 3 }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: <sliced-groups>
|
||||
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "groups" groups } }
|
||||
{ $description "Outputs a virtual sequence whose elements are overlapping subsequences of " { $snippet "n" } " elements from the underlying sequence." }
|
||||
{ $description "Outputs a virtual sequence whose elements are slices of disjoint subsequences of " { $snippet "n" } " elements from the underlying sequence." }
|
||||
{ $examples
|
||||
{ $example
|
||||
"USING: arrays kernel prettyprint sequences grouping ;"
|
||||
|
@ -60,6 +64,11 @@ HELP: <sliced-groups>
|
|||
"dup [ reverse-here ] each concat >array ."
|
||||
"{ 2 1 0 5 4 3 8 7 6 }"
|
||||
}
|
||||
{ $example
|
||||
"USING: kernel prettyprint sequences grouping ;"
|
||||
"{ 1 2 3 4 5 6 } 3 <sliced-groups> 1 swap nth ."
|
||||
"T{ slice { from 3 } { to 6 } { seq { 1 2 3 4 5 6 } } }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: clumps
|
||||
|
@ -89,11 +98,23 @@ HELP: <clumps>
|
|||
"share-price 4 <clumps> [ [ sum ] [ length ] bi / ] map ."
|
||||
"{ 113/400 167/400 201/400 241/400 243/400 91/200 1/4 }"
|
||||
}
|
||||
{ $example
|
||||
"USING: kernel sequences grouping prettyprint ;"
|
||||
"{ 1 2 3 4 5 6 } 3 <clumps> second ."
|
||||
"{ 2 3 4 }"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: <sliced-clumps>
|
||||
{ $values { "seq" "a sequence" } { "n" "a non-negative integer" } { "clumps" clumps } }
|
||||
{ $description "Outputs a virtual sequence whose elements are overlapping slices of " { $snippet "n" } " elements from the underlying sequence." } ;
|
||||
{ $description "Outputs a virtual sequence whose elements are overlapping slices of " { $snippet "n" } " elements from the underlying sequence." }
|
||||
{ $examples
|
||||
{ $example
|
||||
"USING: kernel sequences grouping prettyprint ;"
|
||||
"{ 1 2 3 4 5 6 } 3 <sliced-clumps> second ."
|
||||
"T{ slice { from 1 } { to 4 } { seq { 1 2 3 4 5 6 } } }"
|
||||
}
|
||||
} ;
|
||||
|
||||
{ clumps groups } related-words
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2007, 2008 Slava Pestov.
|
||||
! Copyright (C) 2007, 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces kernel assocs io.files io.streams.duplex
|
||||
combinators arrays io.launcher io.encodings.binary io
|
||||
combinators arrays io.launcher io.encodings io.encodings.binary io
|
||||
http.server.static http.server http accessors sequences strings
|
||||
math.parser fry urls urls.encoding calendar ;
|
||||
IN: http.server.cgi
|
||||
|
@ -52,6 +52,7 @@ IN: http.server.cgi
|
|||
200 >>code
|
||||
"CGI output follows" >>message
|
||||
swap '[
|
||||
binary encode-output
|
||||
_ output-stream get swap <cgi-process> binary <process-stream> [
|
||||
post-request? [ request get post-data>> raw>> write flush ] when
|
||||
input-stream get swap (stream-copy)
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax kernel quotations ;
|
||||
IN: io.directories.search
|
||||
|
||||
HELP: each-file
|
||||
{ $values
|
||||
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
}
|
||||
{ $description "Performs a directory traversal, breadth-first or depth-first, and calls the quotation on the full pathname of each file." }
|
||||
{ $examples
|
||||
{ $unchecked-example "USING: sequences io.directories.search ;"
|
||||
"\"resource:misc\" t [ . ] each-file"
|
||||
"! Recursive directory listing prints here"
|
||||
}
|
||||
} ;
|
||||
|
||||
HELP: recursive-directory
|
||||
{ $values
|
||||
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" }
|
||||
{ "paths" "a sequence of pathname strings" }
|
||||
}
|
||||
{ $description "Traverses a directory path recursively and returns a sequence of files in a breadth-first or depth-first manner." } ;
|
||||
|
||||
HELP: find-file
|
||||
{ $values
|
||||
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "path/f" "a pathname string or f" }
|
||||
}
|
||||
{ $description "Finds the first file in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
HELP: find-in-directories
|
||||
{ $values
|
||||
{ "directories" "a sequence of pathnames" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "path'" "a pathname string" }
|
||||
}
|
||||
{ $description "Finds the first file in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
HELP: find-all-files
|
||||
{ $values
|
||||
{ "path" "a pathname string" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "paths" "a sequence of pathname strings" }
|
||||
}
|
||||
{ $description "Finds all files in the input directory matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
HELP: find-all-in-directories
|
||||
{ $values
|
||||
{ "directories" "a sequence of directory paths" } { "bfs?" "a boolean, breadth-first or depth-first" } { "quot" quotation }
|
||||
{ "paths" "a sequence of pathname strings" }
|
||||
}
|
||||
{ $description "Finds all files in the input directories matching the predicate quotation in a breadth-first or depth-first traversal." } ;
|
||||
|
||||
{ find-file find-all-files find-in-directories find-all-in-directories } related-words
|
||||
|
||||
ARTICLE: "io.directories.search" "io.directories.search"
|
||||
"The " { $vocab-link "io.directories.search" } " vocabulary contains words used for recursively iterating over a directory and for finding files in a directory tree." $nl
|
||||
"Traversing directories:"
|
||||
{ $subsection recursive-directory }
|
||||
{ $subsection each-file }
|
||||
"Finding files:"
|
||||
{ $subsection find-file }
|
||||
{ $subsection find-all-files }
|
||||
{ $subsection find-in-directories }
|
||||
{ $subsection find-all-in-directories } ;
|
||||
|
||||
ABOUT: "io.directories.search"
|
|
@ -5,10 +5,10 @@ io.directories io.files io.files.info io.pathnames kernel
|
|||
sequences system vocabs.loader ;
|
||||
IN: io.directories.search
|
||||
|
||||
TUPLE: directory-iterator path bfs queue ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
TUPLE: directory-iterator path bfs queue ;
|
||||
|
||||
: qualified-directory ( path -- seq )
|
||||
dup directory-files [ append-path ] with map ;
|
||||
|
||||
|
@ -38,22 +38,25 @@ TUPLE: directory-iterator path bfs queue ;
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: each-file ( path bfs? quot: ( obj -- ) -- )
|
||||
[ <directory-iterator> ] dip
|
||||
[ f ] compose iterate-directory drop ; inline
|
||||
|
||||
: recursive-directory ( path bfs? -- paths )
|
||||
[ ] accumulator [ each-file ] dip ;
|
||||
|
||||
: find-file ( path bfs? quot: ( obj -- ? ) -- path/f )
|
||||
[ <directory-iterator> ] dip
|
||||
[ keep and ] curry iterate-directory ; inline
|
||||
|
||||
: each-file ( path bfs? quot: ( obj -- ? ) -- )
|
||||
[ <directory-iterator> ] dip
|
||||
[ f ] compose iterate-directory drop ; inline
|
||||
|
||||
: find-all-files ( path bfs? quot: ( obj -- ? ) -- paths )
|
||||
[ <directory-iterator> ] dip
|
||||
pusher [ [ f ] compose iterate-directory drop ] dip ; inline
|
||||
|
||||
: recursive-directory ( path bfs? -- paths )
|
||||
[ ] accumulator [ each-file ] dip ;
|
||||
: find-in-directories ( directories bfs? quot: ( obj -- ? ) -- path' )
|
||||
'[ _ _ find-file ] attempt-all ;
|
||||
|
||||
: find-in-directories ( directories bfs? quot -- path' )
|
||||
'[ _ _ find-file ] attempt-all ; inline
|
||||
: find-all-in-directories ( directories bfs? quot: ( obj -- ? ) -- paths )
|
||||
'[ _ _ find-all-files ] map concat ;
|
||||
|
||||
os windows? [ "io.directories.search.windows" require ] when
|
||||
|
|
|
@ -0,0 +1,311 @@
|
|||
# GraphemeBreakTest-5.1.0.txt
|
||||
# Date: 2008-03-11, 02:19:22 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
#
|
||||
# Default Grapheme Break Test
|
||||
#
|
||||
# Format:
|
||||
# <string> (# <comment>)?
|
||||
# <string> contains hex Unicode code points, with
|
||||
# ÷ wherever there is a break opportunity, and
|
||||
# × wherever there is not.
|
||||
# <comment> the format can change, but currently it shows:
|
||||
# - the sample character name
|
||||
# - (x) the line_break property* for the sample character
|
||||
# - [x] the rule that determines whether there is a break or not
|
||||
#
|
||||
# These samples may be extended or changed in the future.
|
||||
#
|
||||
÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0020 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 000D ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 000A ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0001 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0300 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0E40 × 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0E40 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0E40 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0E40 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0E40 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0E40 × 0308 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0E40 × 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0E40 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0E40 × 0308 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0E40 × 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0E40 × 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0E40 × 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0E40 × AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0E40 × AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0E40 × 0308 ÷ AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 0903 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 1100 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 1160 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ 11A8 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ AC00 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
|
||||
÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
|
||||
÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
|
||||
÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
|
||||
÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3]
|
||||
÷ AC01 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3]
|
||||
÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
|
||||
÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
|
||||
÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
|
||||
÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
|
||||
÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
|
||||
÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
|
||||
# Lines: 288
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,39 @@
|
|||
USING: help.syntax help.markup strings ;
|
||||
IN: unicode.breaks
|
||||
|
||||
ABOUT: "unicode.breaks"
|
||||
|
||||
ARTICLE: "unicode.breaks" "Word and grapheme breaks"
|
||||
"The " { $vocab-link "unicode.breaks" "unicode.breaks" } " vocabulary partially implements Unicode Standard Annex #29. This provides for segmentation of a string along grapheme and word boundaries. In Unicode, a grapheme, or a basic unit of display in text, may be more than one code point. For example, in the string \"e\\u000301\" (where U+0301 is a combining acute accent), there is only one grapheme, as the acute accent goes above the e, forming a single grapheme. Word breaks, in general, are more complicated than simply splitting by whitespace, and the Unicode algorithm provides for that."
|
||||
$nl "Operations for graphemes:"
|
||||
{ $subsection first-grapheme }
|
||||
{ $subsection last-grapheme }
|
||||
{ $subsection >graphemes }
|
||||
{ $subsection string-reverse }
|
||||
"Operations on words:"
|
||||
{ $subsection first-word }
|
||||
{ $subsection >words } ;
|
||||
|
||||
HELP: first-grapheme
|
||||
{ $values { "str" string } { "i" "an index" } }
|
||||
{ $description "Finds the length of the first grapheme of the string. This can be used repeatedly to efficiently traverse the graphemes of the string, using slices." } ;
|
||||
|
||||
HELP: last-grapheme
|
||||
{ $values { "str" string } { "i" "an index" } }
|
||||
{ $description "Finds the index of the start of the last grapheme of the string. This can be used to traverse the graphemes of a string backwards." } ;
|
||||
|
||||
HELP: >graphemes
|
||||
{ $values { "str" string } { "graphemes" "an array of strings" } }
|
||||
{ $description "Divides a string into a sequence of individual graphemes." } ;
|
||||
|
||||
HELP: string-reverse
|
||||
{ $values { "str" string } { "rts" string } }
|
||||
{ $description "Reverses a string, leaving graphemes in-tact." } ;
|
||||
|
||||
HELP: first-word
|
||||
{ $values { "str" string } { "i" "index" } }
|
||||
{ $description "Finds the length of the first word in the string." } ;
|
||||
|
||||
HELP: >words
|
||||
{ $values { "str" string } { "words" "an array of strings" } }
|
||||
{ $description "Divides the string up into words." } ;
|
|
@ -1,7 +1,39 @@
|
|||
USING: tools.test unicode.breaks sequences math kernel ;
|
||||
USING: tools.test unicode.breaks sequences math kernel splitting
|
||||
unicode.categories io.pathnames io.encodings.utf8 io.files
|
||||
strings quotations math.parser locals ;
|
||||
IN: unicode.breaks.tests
|
||||
|
||||
[ "\u001112\u001161\u0011abA\u000300a\r\r\n" ]
|
||||
[ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test
|
||||
[ "dcba" ] [ "abcd" string-reverse ] unit-test
|
||||
[ 3 ] [ "\u001112\u001161\u0011abA\u000300a"
|
||||
dup last-grapheme head last-grapheme ] unit-test
|
||||
|
||||
: grapheme-break-test ( -- filename )
|
||||
"resource:basis/unicode/breaks/GraphemeBreakTest.txt" ;
|
||||
|
||||
: word-break-test ( -- filename )
|
||||
"resource:basis/unicode/breaks/WordBreakTest.txt" ;
|
||||
|
||||
: parse-test-file ( file-name -- tests )
|
||||
utf8 file-lines
|
||||
[ "#" split1 drop ] map harvest [
|
||||
"÷" split
|
||||
[ "×" split [ [ blank? ] trim hex> ] map harvest >string ] map
|
||||
harvest
|
||||
] map ;
|
||||
|
||||
:: test ( tests quot -- )
|
||||
tests [
|
||||
[ 1quotation ]
|
||||
[ concat [ quot call [ "" like ] map ] curry ] bi unit-test
|
||||
] each ;
|
||||
|
||||
: grapheme-test ( tests quot -- )
|
||||
[
|
||||
[ 1quotation ]
|
||||
[ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test
|
||||
] each ;
|
||||
|
||||
grapheme-break-test parse-test-file [ >graphemes ] test
|
||||
word-break-test parse-test-file [ >words ] test
|
||||
|
|
|
@ -2,17 +2,24 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: combinators.short-circuit unicode.categories kernel math
|
||||
combinators splitting sequences math.parser io.files io assocs
|
||||
arrays namespaces make math.ranges unicode.normalize values
|
||||
arrays namespaces make math.ranges unicode.normalize.private values
|
||||
io.encodings.ascii unicode.syntax unicode.data compiler.units
|
||||
alien.syntax sets ;
|
||||
alien.syntax sets accessors interval-maps memoize locals words ;
|
||||
IN: unicode.breaks
|
||||
|
||||
C-ENUM: Any L V T Extend Control CR LF graphemes ;
|
||||
<PRIVATE
|
||||
! Grapheme breaks
|
||||
|
||||
C-ENUM: Any L V T LV LVT Extend Control CR LF
|
||||
SpacingMark Prepend graphemes ;
|
||||
|
||||
: jamo-class ( ch -- class )
|
||||
dup initial? [ drop L ]
|
||||
[ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
|
||||
|
||||
: hangul-class ( ch -- class )
|
||||
hangul-base - HEX: 1C mod zero? LV LVT ? ;
|
||||
|
||||
CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
||||
: control-class ( ch -- class )
|
||||
{
|
||||
|
@ -27,16 +34,24 @@ CATEGORY: (extend) Me Mn ;
|
|||
: extend? ( ch -- ? )
|
||||
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
|
||||
|
||||
: loe? ( ch -- ? )
|
||||
"Logical_Order_Exception" property? ;
|
||||
|
||||
CATEGORY: spacing Mc ;
|
||||
|
||||
: grapheme-class ( ch -- class )
|
||||
{
|
||||
{ [ dup jamo? ] [ jamo-class ] }
|
||||
{ [ dup hangul? ] [ hangul-class ] }
|
||||
{ [ dup grapheme-control? ] [ control-class ] }
|
||||
{ [ extend? ] [ Extend ] }
|
||||
{ [ dup extend? ] [ drop Extend ] }
|
||||
{ [ dup spacing? ] [ drop SpacingMark ] }
|
||||
{ [ loe? ] [ Prepend ] }
|
||||
[ Any ]
|
||||
} cond ;
|
||||
|
||||
: init-grapheme-table ( -- table )
|
||||
graphemes [ graphemes f <array> ] replicate ;
|
||||
: init-table ( size -- table )
|
||||
dup [ f <array> ] curry replicate ;
|
||||
|
||||
SYMBOL: table
|
||||
|
||||
|
@ -49,22 +64,32 @@ SYMBOL: table
|
|||
: connect ( class1 class2 -- ) 1 set-table ;
|
||||
: disconnect ( class1 class2 -- ) 0 set-table ;
|
||||
|
||||
: check-before ( class classes value -- )
|
||||
[ set-table ] curry with each ;
|
||||
|
||||
: check-after ( classes class value -- )
|
||||
[ set-table ] 2curry each ;
|
||||
|
||||
: connect-before ( class classes -- )
|
||||
[ connect ] with each ;
|
||||
1 check-before ;
|
||||
|
||||
: connect-after ( classes class -- )
|
||||
[ connect ] curry each ;
|
||||
|
||||
1 check-after ;
|
||||
|
||||
: break-around ( classes1 classes2 -- )
|
||||
[ [ 2dup disconnect swap disconnect ] with each ] curry each ;
|
||||
|
||||
: make-grapheme-table ( -- )
|
||||
CR LF connect
|
||||
Control CR LF 3array graphemes break-around
|
||||
L L V 2array connect-before
|
||||
L L V LV LVT 4array connect-before
|
||||
V V T 2array connect-before
|
||||
LV V T 2array connect-before
|
||||
T T connect
|
||||
graphemes Extend connect-after ;
|
||||
LVT T connect
|
||||
graphemes Extend connect-after
|
||||
graphemes SpacingMark connect-after
|
||||
Prepend graphemes connect-before ;
|
||||
|
||||
VALUE: grapheme-table
|
||||
|
||||
|
@ -77,19 +102,28 @@ VALUE: grapheme-table
|
|||
: find-index ( seq quot -- i ) find drop ; inline
|
||||
: find-last-index ( seq quot -- i ) find-last drop ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: first-grapheme ( str -- i )
|
||||
unclip-slice grapheme-class over
|
||||
[ grapheme-class tuck grapheme-break? ] find-index
|
||||
nip swap length or 1+ ;
|
||||
|
||||
: (>graphemes) ( str -- )
|
||||
[
|
||||
dup first-grapheme cut-slice
|
||||
swap , (>graphemes)
|
||||
] unless-empty ;
|
||||
<PRIVATE
|
||||
|
||||
:: (>pieces) ( str quot -- )
|
||||
str [
|
||||
dup quot call cut-slice
|
||||
swap , quot (>pieces)
|
||||
] unless-empty ; inline recursive
|
||||
|
||||
: >pieces ( str quot -- graphemes )
|
||||
[ (>pieces) ] { } make ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: >graphemes ( str -- graphemes )
|
||||
[ (>graphemes) ] { } make ;
|
||||
[ first-grapheme ] >pieces ;
|
||||
|
||||
: string-reverse ( str -- rts )
|
||||
>graphemes reverse concat ;
|
||||
|
@ -98,6 +132,113 @@ VALUE: grapheme-table
|
|||
unclip-last-slice grapheme-class swap
|
||||
[ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
|
||||
|
||||
init-grapheme-table table
|
||||
<PRIVATE
|
||||
|
||||
graphemes init-table table
|
||||
[ make-grapheme-table finish-table ] with-variable
|
||||
to: grapheme-table
|
||||
|
||||
! Word breaks
|
||||
|
||||
VALUE: word-break-table
|
||||
|
||||
"resource:basis/unicode/data/WordBreakProperty.txt" load-script
|
||||
to: word-break-table
|
||||
|
||||
C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
|
||||
wMidNum wMidNumLet wNumeric wExtendNumLet words ;
|
||||
|
||||
: word-break-classes ( -- table ) ! Is there a way to avoid this?
|
||||
H{
|
||||
{ "Other" 0 } { "CR" 1 } { "LF" 2 } { "Newline" 3 }
|
||||
{ "Extend" 4 } { "Format" 5 } { "Katakana" 6 }
|
||||
{ "ALetter" 7 } { "MidLetter" 8 }
|
||||
{ "MidNum" 9 } { "MidNumLet" 10 } { "Numeric" 11 }
|
||||
{ "ExtendNumLet" 12 }
|
||||
} ;
|
||||
|
||||
: word-break-prop ( char -- word-break-prop )
|
||||
word-break-table interval-at
|
||||
word-break-classes at [ wOther ] unless* ;
|
||||
|
||||
: e ( seq -- seq ) [ execute ] map ;
|
||||
|
||||
SYMBOL: check-letter-before
|
||||
SYMBOL: check-letter-after
|
||||
SYMBOL: check-number-before
|
||||
SYMBOL: check-number-after
|
||||
|
||||
: make-word-table ( -- )
|
||||
wCR wLF connect
|
||||
{ wNewline wCR wLF } e words break-around
|
||||
wALetter dup connect
|
||||
wALetter { wMidLetter wMidNumLet } e check-letter-after check-before
|
||||
{ wMidLetter wMidNumLet } e wALetter check-letter-before check-after
|
||||
wNumeric dup connect
|
||||
wALetter wNumeric connect
|
||||
wNumeric wALetter connect
|
||||
wNumeric { wMidNum wMidNumLet } e check-number-after check-before
|
||||
{ wMidNum wMidNumLet } e wNumeric check-number-before check-after
|
||||
wKatakana dup connect
|
||||
{ wALetter wNumeric wKatakana wExtendNumLet } e wExtendNumLet
|
||||
[ connect-after ] [ swap connect-before ] 2bi ;
|
||||
|
||||
VALUE: word-table
|
||||
|
||||
: finish-word-table ( -- table )
|
||||
table get [
|
||||
[ { { 0 [ f ] } { 1 [ t ] } [ ] } case ] map
|
||||
] map ;
|
||||
|
||||
words init-table table
|
||||
[ make-word-table finish-word-table ] with-variable
|
||||
to: word-table
|
||||
|
||||
: word-table-nth ( class1 class2 -- ? )
|
||||
word-table nth nth ;
|
||||
|
||||
: property-not= ( i str property -- ? )
|
||||
pick [
|
||||
[ ?nth ] dip swap
|
||||
[ word-break-prop = not ] [ drop f ] if*
|
||||
] [ 3drop t ] if ;
|
||||
|
||||
: format/extended? ( ch -- ? )
|
||||
word-break-prop { 4 5 } member? ;
|
||||
|
||||
:: walk-up ( str i -- j )
|
||||
i 1 + str [ format/extended? not ] find-from drop
|
||||
1+ str [ format/extended? not ] find-from drop ; ! possible bounds error?
|
||||
|
||||
:: walk-down ( str i -- j )
|
||||
i str [ format/extended? not ] find-last-from drop
|
||||
1- str [ format/extended? not ] find-last-from drop ; ! possible bounds error?
|
||||
|
||||
:: word-break? ( table-entry i str -- ? )
|
||||
table-entry {
|
||||
{ t [ f ] }
|
||||
{ f [ t ] }
|
||||
{ check-letter-after
|
||||
[ str i walk-up str wALetter property-not= ] }
|
||||
{ check-letter-before
|
||||
[ str i walk-down str wALetter property-not= ] }
|
||||
{ check-number-after
|
||||
[ str i walk-up str wNumeric property-not= ] }
|
||||
{ check-number-before
|
||||
[ str i walk-down str wNumeric property-not= ] }
|
||||
} case ;
|
||||
|
||||
:: word-break-next ( old-class new-char i str -- next-class ? )
|
||||
new-char word-break-prop dup { 4 5 } member?
|
||||
[ drop old-class dup { 1 2 3 } member? ]
|
||||
[ old-class over word-table-nth i str word-break? ] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
:: first-word ( str -- i )
|
||||
str unclip-slice word-break-prop over <enum>
|
||||
[ swap str word-break-next ] assoc-find 2drop
|
||||
nip swap length or 1+ ;
|
||||
|
||||
: >words ( str -- words )
|
||||
[ first-word ] >pieces ;
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
USING: help.syntax help.markup ;
|
||||
IN: unicode.case
|
||||
|
||||
ABOUT: "unicode.case"
|
||||
|
||||
ARTICLE: "unicode.case" "Case mapping"
|
||||
"When considering Unicode in general and not just ASCII or a smaller character set, putting a string in upper case, title case or lower case is slightly more complicated. In most contexts it's best to use the general Unicode routines for case conversion. There is an additional type of casing, case-fold, which is defined as bringing a string into upper case and then lower. This exists because in some cases it is different from simple lower case."
|
||||
{ $subsection >upper }
|
||||
{ $subsection >lower }
|
||||
{ $subsection >title }
|
||||
{ $subsection >case-fold }
|
||||
"To test if a string is in a given case:"
|
||||
{ $subsection upper? }
|
||||
{ $subsection lower? }
|
||||
{ $subsection title? }
|
||||
{ $subsection case-fold? }
|
||||
"For certain languages (Turkish, Azeri, Lithuanian), case mapping is dependent on locale; To change this, set the following variable to the ISO-639-1 code for your language:"
|
||||
{ $subsection locale }
|
||||
"This is unnecessary for most languages." ;
|
|
@ -5,14 +5,15 @@ unicode.normalize math unicode.categories combinators
|
|||
assocs strings splitting kernel accessors ;
|
||||
IN: unicode.case
|
||||
|
||||
<PRIVATE
|
||||
: at-default ( key assoc -- value/key ) [ at ] [ drop ] 2bi or ;
|
||||
|
||||
: ch>lower ( ch -- lower ) simple-lower at-default ;
|
||||
: ch>upper ( ch -- upper ) simple-upper at-default ;
|
||||
: ch>title ( ch -- title ) simple-title at-default ;
|
||||
|
||||
PRIVATE>
|
||||
SYMBOL: locale ! Just casing locale, or overall?
|
||||
|
||||
<PRIVATE
|
||||
: i-dot? ( -- ? )
|
||||
locale get { "tr" "az" } member? ;
|
||||
|
||||
|
@ -79,7 +80,7 @@ SYMBOL: locale ! Just casing locale, or overall?
|
|||
[ [ % ] compose ] [ [ , ] compose ] bi* ?if
|
||||
] 2curry each
|
||||
] "" make ; inline
|
||||
|
||||
PRIVATE>
|
||||
: >lower ( string -- lower )
|
||||
i-dot? [ turk>lower ] when
|
||||
final-sigma [ lower>> ] [ ch>lower ] map-case ;
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
! Copyright (C) 2009 Your name.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax kernel ;
|
||||
IN: unicode.categories
|
||||
|
||||
HELP: LETTER?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether the code point is an upper-cased letter" } ;
|
||||
|
||||
HELP: Letter?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether the code point is a letter of any case" } ;
|
||||
|
||||
HELP: alpha?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether the code point is alphanumeric" } ;
|
||||
|
||||
HELP: blank?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether the code point is whitespace" } ;
|
||||
|
||||
HELP: character?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a number is a code point which has been assigned" } ;
|
||||
|
||||
HELP: control?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a code point is a control character" } ;
|
||||
|
||||
HELP: digit?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a code point is a digit" } ;
|
||||
|
||||
HELP: letter?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a code point is a lower-cased letter" } ;
|
||||
|
||||
HELP: printable?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a code point is printable, as opposed to being a control character or formatting character" } ;
|
||||
|
||||
HELP: uncased?
|
||||
{ $values { "ch" "a character" } { "?" "a boolean" } }
|
||||
{ $description "Determines whether a character has a case" } ;
|
||||
|
||||
ARTICLE: "unicode.categories" "Character classes"
|
||||
{ $vocab-link "unicode.categories" } " is a vocabulary which provides predicates for determining if a code point has a particular property, for example being a lower cased letter. These should be used in preference to the " { $vocab-link "ASCII" "ascii" } " equivalents in most cases. Below are links to the useful predicates, but note that each of these is defined to be a predicate class."
|
||||
{ $subsection blank? }
|
||||
{ $subsection letter? }
|
||||
{ $subsection LETTER? }
|
||||
{ $subsection Letter? }
|
||||
{ $subsection digit? }
|
||||
{ $subsection printable? }
|
||||
{ $subsection alpha? }
|
||||
{ $subsection control? }
|
||||
{ $subsection uncased? }
|
||||
{ $subsection character? } ;
|
||||
|
||||
ABOUT: "unicode.categories"
|
|
@ -1,10 +1,8 @@
|
|||
USING: help.syntax help.markup strings byte-arrays ;
|
||||
IN: unicode.collation
|
||||
|
||||
ABOUT: "unicode.collation"
|
||||
|
||||
ARTICLE: "unicode.collation" "Unicode collation algorithm (UCA)"
|
||||
"The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:"
|
||||
ARTICLE: "unicode.collation" "Collation and weak comparison"
|
||||
"The " { $vocab-link "unicode.collation" "unicode.collation" } " vocabulary implements the Unicode Collation Algorithm. The Unicode Collation Algorithm (UTS #10) forms a reasonable way to sort strings when accouting for all of the characters in Unicode. It is far preferred over code point order when sorting for human consumption, in user interfaces. At the moment, only the default Unicode collation element table (DUCET) is used, but a more accurate collation would take locale into account. The following words are defined:"
|
||||
{ $subsection sort-strings }
|
||||
{ $subsection collation-key }
|
||||
{ $subsection string<=> }
|
||||
|
@ -13,6 +11,8 @@ ARTICLE: "unicode.collation" "Unicode collation algorithm (UCA)"
|
|||
{ $subsection tertiary= }
|
||||
{ $subsection quaternary= } ;
|
||||
|
||||
ABOUT: "unicode.collation"
|
||||
|
||||
HELP: sort-strings
|
||||
{ $values { "strings" "a sequence of strings" } { "sorted" "the strings in DUCET order" } }
|
||||
{ $description "This word takes a sequence of strings and sorts them according to the UCA, using code point order as a tie-breaker." } ;
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
# CompositionExclusions-5.1.0.txt
|
||||
# Date: 2008-03-20, 17:45:00 PDT [KW]
|
||||
#
|
||||
# This file lists the characters for the Composition Exclusion Table
|
||||
# defined in UAX #15, Unicode Normalization Forms.
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# For more information, see
|
||||
# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
|
||||
#
|
||||
# For a full derivation of composition exclusions, see the derived property
|
||||
# Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
#
|
||||
|
||||
# ================================================
|
||||
# (1) Script Specifics
|
||||
#
|
||||
# This list of characters cannot be derived from the UnicodeData.txt file.
|
||||
# ================================================
|
||||
|
||||
0958 # DEVANAGARI LETTER QA
|
||||
0959 # DEVANAGARI LETTER KHHA
|
||||
095A # DEVANAGARI LETTER GHHA
|
||||
095B # DEVANAGARI LETTER ZA
|
||||
095C # DEVANAGARI LETTER DDDHA
|
||||
095D # DEVANAGARI LETTER RHA
|
||||
095E # DEVANAGARI LETTER FA
|
||||
095F # DEVANAGARI LETTER YYA
|
||||
09DC # BENGALI LETTER RRA
|
||||
09DD # BENGALI LETTER RHA
|
||||
09DF # BENGALI LETTER YYA
|
||||
0A33 # GURMUKHI LETTER LLA
|
||||
0A36 # GURMUKHI LETTER SHA
|
||||
0A59 # GURMUKHI LETTER KHHA
|
||||
0A5A # GURMUKHI LETTER GHHA
|
||||
0A5B # GURMUKHI LETTER ZA
|
||||
0A5E # GURMUKHI LETTER FA
|
||||
0B5C # ORIYA LETTER RRA
|
||||
0B5D # ORIYA LETTER RHA
|
||||
0F43 # TIBETAN LETTER GHA
|
||||
0F4D # TIBETAN LETTER DDHA
|
||||
0F52 # TIBETAN LETTER DHA
|
||||
0F57 # TIBETAN LETTER BHA
|
||||
0F5C # TIBETAN LETTER DZHA
|
||||
0F69 # TIBETAN LETTER KSSA
|
||||
0F76 # TIBETAN VOWEL SIGN VOCALIC R
|
||||
0F78 # TIBETAN VOWEL SIGN VOCALIC L
|
||||
0F93 # TIBETAN SUBJOINED LETTER GHA
|
||||
0F9D # TIBETAN SUBJOINED LETTER DDHA
|
||||
0FA2 # TIBETAN SUBJOINED LETTER DHA
|
||||
0FA7 # TIBETAN SUBJOINED LETTER BHA
|
||||
0FAC # TIBETAN SUBJOINED LETTER DZHA
|
||||
0FB9 # TIBETAN SUBJOINED LETTER KSSA
|
||||
FB1D # HEBREW LETTER YOD WITH HIRIQ
|
||||
FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
|
||||
FB2A # HEBREW LETTER SHIN WITH SHIN DOT
|
||||
FB2B # HEBREW LETTER SHIN WITH SIN DOT
|
||||
FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
|
||||
FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
|
||||
FB2E # HEBREW LETTER ALEF WITH PATAH
|
||||
FB2F # HEBREW LETTER ALEF WITH QAMATS
|
||||
FB30 # HEBREW LETTER ALEF WITH MAPIQ
|
||||
FB31 # HEBREW LETTER BET WITH DAGESH
|
||||
FB32 # HEBREW LETTER GIMEL WITH DAGESH
|
||||
FB33 # HEBREW LETTER DALET WITH DAGESH
|
||||
FB34 # HEBREW LETTER HE WITH MAPIQ
|
||||
FB35 # HEBREW LETTER VAV WITH DAGESH
|
||||
FB36 # HEBREW LETTER ZAYIN WITH DAGESH
|
||||
FB38 # HEBREW LETTER TET WITH DAGESH
|
||||
FB39 # HEBREW LETTER YOD WITH DAGESH
|
||||
FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
|
||||
FB3B # HEBREW LETTER KAF WITH DAGESH
|
||||
FB3C # HEBREW LETTER LAMED WITH DAGESH
|
||||
FB3E # HEBREW LETTER MEM WITH DAGESH
|
||||
FB40 # HEBREW LETTER NUN WITH DAGESH
|
||||
FB41 # HEBREW LETTER SAMEKH WITH DAGESH
|
||||
FB43 # HEBREW LETTER FINAL PE WITH DAGESH
|
||||
FB44 # HEBREW LETTER PE WITH DAGESH
|
||||
FB46 # HEBREW LETTER TSADI WITH DAGESH
|
||||
FB47 # HEBREW LETTER QOF WITH DAGESH
|
||||
FB48 # HEBREW LETTER RESH WITH DAGESH
|
||||
FB49 # HEBREW LETTER SHIN WITH DAGESH
|
||||
FB4A # HEBREW LETTER TAV WITH DAGESH
|
||||
FB4B # HEBREW LETTER VAV WITH HOLAM
|
||||
FB4C # HEBREW LETTER BET WITH RAFE
|
||||
FB4D # HEBREW LETTER KAF WITH RAFE
|
||||
FB4E # HEBREW LETTER PE WITH RAFE
|
||||
|
||||
# Total code points: 67
|
||||
|
||||
# ================================================
|
||||
# (2) Post Composition Version precomposed characters
|
||||
#
|
||||
# These characters cannot be derived solely from the UnicodeData.txt file
|
||||
# in this version of Unicode.
|
||||
#
|
||||
# Note that characters added to the standard after the
|
||||
# Composition Version and which have canonical decomposition mappings
|
||||
# are not automatically added to this list of Post Composition
|
||||
# Version precomposed characters.
|
||||
# ================================================
|
||||
|
||||
2ADC # FORKING
|
||||
1D15E # MUSICAL SYMBOL HALF NOTE
|
||||
1D15F # MUSICAL SYMBOL QUARTER NOTE
|
||||
1D160 # MUSICAL SYMBOL EIGHTH NOTE
|
||||
1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
|
||||
1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
|
||||
1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
|
||||
1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
||||
1D1BB # MUSICAL SYMBOL MINIMA
|
||||
1D1BC # MUSICAL SYMBOL MINIMA BLACK
|
||||
1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
|
||||
1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
|
||||
1D1BF # MUSICAL SYMBOL FUSA WHITE
|
||||
1D1C0 # MUSICAL SYMBOL FUSA BLACK
|
||||
|
||||
# Total code points: 14
|
||||
|
||||
# ================================================
|
||||
# (3) Singleton Decompositions
|
||||
#
|
||||
# These characters can be derived from the UnicodeData.txt file
|
||||
# by including all characters whose canonical decomposition
|
||||
# consists of a single character.
|
||||
#
|
||||
# These characters are simply quoted here for reference.
|
||||
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
# ================================================
|
||||
|
||||
# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
|
||||
# 0343 COMBINING GREEK KORONIS
|
||||
# 0374 GREEK NUMERAL SIGN
|
||||
# 037E GREEK QUESTION MARK
|
||||
# 0387 GREEK ANO TELEIA
|
||||
# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
|
||||
# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
|
||||
# 1F75 GREEK SMALL LETTER ETA WITH OXIA
|
||||
# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
|
||||
# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
||||
# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
|
||||
# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
|
||||
# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
|
||||
# 1FBE GREEK PROSGEGRAMMENI
|
||||
# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
|
||||
# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
|
||||
# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
|
||||
# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
|
||||
# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
|
||||
# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
|
||||
# 1FFD GREEK OXIA
|
||||
# 2000..2001 [2] EN QUAD..EM QUAD
|
||||
# 2126 OHM SIGN
|
||||
# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
|
||||
# 2329 LEFT-POINTING ANGLE BRACKET
|
||||
# 232A RIGHT-POINTING ANGLE BRACKET
|
||||
# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
|
||||
# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
|
||||
# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
|
||||
# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
|
||||
# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
|
||||
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
|
||||
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
|
||||
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||||
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
|
||||
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 1030
|
||||
|
||||
# ================================================
|
||||
# (4) Non-Starter Decompositions
|
||||
#
|
||||
# These characters can be derived from the UnicodeData file
|
||||
# by including all characters whose canonical decomposition consists
|
||||
# of a sequence of characters, the first of which has a non-zero
|
||||
# combining class.
|
||||
#
|
||||
# These characters are simply quoted here for reference.
|
||||
# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
|
||||
# ================================================
|
||||
|
||||
# 0344 COMBINING GREEK DIALYTIKA TONOS
|
||||
# 0F73 TIBETAN VOWEL SIGN II
|
||||
# 0F75 TIBETAN VOWEL SIGN UU
|
||||
# 0F81 TIBETAN VOWEL SIGN REVERSED II
|
||||
|
||||
# Total code points: 4
|
||||
|
|
@ -0,0 +1,816 @@
|
|||
# WordBreakProperty-5.1.0.txt
|
||||
# Date: 2008-03-20, 17:55:36 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2008 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Word_Break
|
||||
|
||||
# All code points not explicitly listed for Word_Break
|
||||
# have the value Other (XX).
|
||||
|
||||
# @missing: 0000..10FFFF; Other
|
||||
|
||||
# ================================================
|
||||
|
||||
000D ; CR # Cc <control-000D>
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
000A ; LF # Cc <control-000A>
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
000B..000C ; Newline # Cc [2] <control-000B>..<control-000C>
|
||||
0085 ; Newline # Cc <control-0085>
|
||||
2028 ; Newline # Zl LINE SEPARATOR
|
||||
2029 ; Newline # Zp PARAGRAPH SEPARATOR
|
||||
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
|
||||
0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
|
||||
05BF ; Extend # Mn HEBREW POINT RAFE
|
||||
05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
|
||||
05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
|
||||
05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN
|
||||
0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
064B..065E ; Extend # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
|
||||
0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
|
||||
06DE ; Extend # Me ARABIC START OF RUB EL HIZB
|
||||
06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
|
||||
06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
|
||||
06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
|
||||
0711 ; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
|
||||
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
|
||||
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
|
||||
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
|
||||
0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
|
||||
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
094D ; Extend # Mn DEVANAGARI SIGN VIRAMA
|
||||
0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0981 ; Extend # Mn BENGALI SIGN CANDRABINDU
|
||||
0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
09BC ; Extend # Mn BENGALI SIGN NUKTA
|
||||
09BE..09C0 ; Extend # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
|
||||
09C1..09C4 ; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
|
||||
09C7..09C8 ; Extend # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
|
||||
09CB..09CC ; Extend # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
|
||||
09CD ; Extend # Mn BENGALI SIGN VIRAMA
|
||||
09D7 ; Extend # Mc BENGALI AU LENGTH MARK
|
||||
09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
|
||||
0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
|
||||
0A03 ; Extend # Mc GURMUKHI SIGN VISARGA
|
||||
0A3C ; Extend # Mn GURMUKHI SIGN NUKTA
|
||||
0A3E..0A40 ; Extend # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
|
||||
0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
|
||||
0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
|
||||
0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
|
||||
0A51 ; Extend # Mn GURMUKHI SIGN UDAAT
|
||||
0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
|
||||
0A75 ; Extend # Mn GURMUKHI SIGN YAKASH
|
||||
0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
|
||||
0A83 ; Extend # Mc GUJARATI SIGN VISARGA
|
||||
0ABC ; Extend # Mn GUJARATI SIGN NUKTA
|
||||
0ABE..0AC0 ; Extend # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
|
||||
0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
|
||||
0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
|
||||
0AC9 ; Extend # Mc GUJARATI VOWEL SIGN CANDRA O
|
||||
0ACB..0ACC ; Extend # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
||||
0ACD ; Extend # Mn GUJARATI SIGN VIRAMA
|
||||
0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
|
||||
0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU
|
||||
0B02..0B03 ; Extend # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
||||
0B3C ; Extend # Mn ORIYA SIGN NUKTA
|
||||
0B3E ; Extend # Mc ORIYA VOWEL SIGN AA
|
||||
0B3F ; Extend # Mn ORIYA VOWEL SIGN I
|
||||
0B40 ; Extend # Mc ORIYA VOWEL SIGN II
|
||||
0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
|
||||
0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
|
||||
0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
|
||||
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
|
||||
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
|
||||
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
|
||||
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
|
||||
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
|
||||
0BBE..0BBF ; Extend # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
|
||||
0BC0 ; Extend # Mn TAMIL VOWEL SIGN II
|
||||
0BC1..0BC2 ; Extend # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
|
||||
0BC6..0BC8 ; Extend # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
|
||||
0BCA..0BCC ; Extend # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
|
||||
0BCD ; Extend # Mn TAMIL SIGN VIRAMA
|
||||
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
|
||||
0C01..0C03 ; Extend # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
|
||||
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C41..0C44 ; Extend # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
||||
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
|
||||
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
|
||||
0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
|
||||
0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
|
||||
0C82..0C83 ; Extend # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
|
||||
0CBC ; Extend # Mn KANNADA SIGN NUKTA
|
||||
0CBE ; Extend # Mc KANNADA VOWEL SIGN AA
|
||||
0CBF ; Extend # Mn KANNADA VOWEL SIGN I
|
||||
0CC0..0CC4 ; Extend # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
|
||||
0CC6 ; Extend # Mn KANNADA VOWEL SIGN E
|
||||
0CC7..0CC8 ; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
|
||||
0CCA..0CCB ; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
|
||||
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
|
||||
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0D02..0D03 ; Extend # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D3E..0D40 ; Extend # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
|
||||
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
0D46..0D48 ; Extend # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
|
||||
0D4A..0D4C ; Extend # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
|
||||
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
|
||||
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
0D82..0D83 ; Extend # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
|
||||
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
|
||||
0DCF..0DD1 ; Extend # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
|
||||
0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||||
0DD6 ; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
|
||||
0DD8..0DDF ; Extend # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
|
||||
0DF2..0DF3 ; Extend # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
|
||||
0E31 ; Extend # Mn THAI CHARACTER MAI HAN-AKAT
|
||||
0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
|
||||
0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
|
||||
0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN
|
||||
0EB4..0EB9 ; Extend # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
|
||||
0EBB..0EBC ; Extend # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
|
||||
0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
|
||||
0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
|
||||
0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
|
||||
0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
|
||||
0F39 ; Extend # Mn TIBETAN MARK TSA -PHRU
|
||||
0F3E..0F3F ; Extend # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
|
||||
0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
|
||||
0F7F ; Extend # Mc TIBETAN SIGN RNAM BCAD
|
||||
0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
|
||||
0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
|
||||
0F90..0F97 ; Extend # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA
|
||||
0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
|
||||
0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN
|
||||
102B..102C ; Extend # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
|
||||
102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
|
||||
1031 ; Extend # Mc MYANMAR VOWEL SIGN E
|
||||
1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
|
||||
1038 ; Extend # Mc MYANMAR SIGN VISARGA
|
||||
1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
|
||||
103B..103C ; Extend # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
|
||||
103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
|
||||
1056..1057 ; Extend # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
|
||||
1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
|
||||
105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
|
||||
1062..1064 ; Extend # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
|
||||
1067..106D ; Extend # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
|
||||
1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
|
||||
1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
|
||||
1083..1084 ; Extend # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
|
||||
1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
|
||||
1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
|
||||
108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
|
||||
108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
|
||||
135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
|
||||
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
|
||||
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
|
||||
1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
|
||||
1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
|
||||
17B6 ; Extend # Mc KHMER VOWEL SIGN AA
|
||||
17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
|
||||
17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
|
||||
17C6 ; Extend # Mn KHMER SIGN NIKAHIT
|
||||
17C7..17C8 ; Extend # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
|
||||
17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
|
||||
17DD ; Extend # Mn KHMER SIGN ATTHACAN
|
||||
180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA
|
||||
1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
1923..1926 ; Extend # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
|
||||
1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
|
||||
1929..192B ; Extend # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
|
||||
1930..1931 ; Extend # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
|
||||
1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA
|
||||
1933..1938 ; Extend # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
|
||||
1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
|
||||
19B0..19C0 ; Extend # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY
|
||||
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
|
||||
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
|
||||
1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
|
||||
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B04 ; Extend # Mc BALINESE SIGN BISAH
|
||||
1B34 ; Extend # Mn BALINESE SIGN REREKAN
|
||||
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
|
||||
1B3B ; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
|
||||
1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA
|
||||
1B3D..1B41 ; Extend # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
|
||||
1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET
|
||||
1B43..1B44 ; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
|
||||
1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
|
||||
1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
|
||||
1B82 ; Extend # Mc SUNDANESE SIGN PANGWISAD
|
||||
1BA1 ; Extend # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
|
||||
1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
|
||||
1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
|
||||
1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
|
||||
1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH
|
||||
1C24..1C2B ; Extend # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
|
||||
1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
|
||||
1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
|
||||
1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
|
||||
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
|
||||
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
|
||||
20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
|
||||
20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
|
||||
2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
|
||||
3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
A66F ; Extend # Mn COMBINING CYRILLIC VZMET
|
||||
A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
|
||||
A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
|
||||
A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
|
||||
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
|
||||
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
|
||||
A823..A824 ; Extend # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
|
||||
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
|
||||
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
|
||||
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
|
||||
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
|
||||
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
|
||||
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
|
||||
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
|
||||
A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
|
||||
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
|
||||
AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
|
||||
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
|
||||
AA33..AA34 ; Extend # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
|
||||
AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
|
||||
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
|
||||
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
|
||||
AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H
|
||||
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||||
10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
|
||||
10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
|
||||
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
|
||||
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
|
||||
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1285
|
||||
|
||||
# ================================================
|
||||
|
||||
00AD ; Format # Cf SOFT HYPHEN
|
||||
0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
06DD ; Format # Cf ARABIC END OF AYAH
|
||||
070F ; Format # Cf SYRIAC ABBREVIATION MARK
|
||||
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
|
||||
200B ; Format # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
|
||||
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
|
||||
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
|
||||
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
|
||||
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
|
||||
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
|
||||
E0001 ; Format # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 137
|
||||
|
||||
# ================================================
|
||||
|
||||
3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
|
||||
30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
|
||||
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
|
||||
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
|
||||
32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO
|
||||
3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO
|
||||
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
|
||||
# Total code points: 309
|
||||
|
||||
# ================================================
|
||||
|
||||
0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
|
||||
0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
|
||||
00AA ; ALetter # L& FEMININE ORDINAL INDICATOR
|
||||
00B5 ; ALetter # L& MICRO SIGN
|
||||
00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR
|
||||
00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
|
||||
00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
|
||||
01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE
|
||||
01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
|
||||
01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
|
||||
01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
|
||||
0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP
|
||||
0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
|
||||
02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
|
||||
02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
|
||||
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
02EC ; ALetter # Lm MODIFIER LETTER VOICING
|
||||
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
|
||||
0370..0373 ; ALetter # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
|
||||
0374 ; ALetter # Lm GREEK NUMERAL SIGN
|
||||
0376..0377 ; ALetter # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
|
||||
037A ; ALetter # Lm GREEK YPOGEGRAMMENI
|
||||
037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
|
||||
0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
|
||||
03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
|
||||
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
|
||||
048A..0523 ; ALetter # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
|
||||
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
|
||||
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||||
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
|
||||
05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
|
||||
0621..063F ; ALetter # Lo [31] ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0640 ; ALetter # Lm ARABIC TATWEEL
|
||||
0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
|
||||
0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
||||
06D5 ; ALetter # Lo ARABIC LETTER AE
|
||||
06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
|
||||
06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
|
||||
06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
|
||||
06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0710 ; ALetter # Lo SYRIAC LETTER ALAPH
|
||||
0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
|
||||
074D..07A5 ; ALetter # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
|
||||
07B1 ; ALetter # Lo THAANA LETTER NAA
|
||||
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
|
||||
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
|
||||
07FA ; ALetter # Lm NKO LAJANYALAN
|
||||
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
0950 ; ALetter # Lo DEVANAGARI OM
|
||||
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972 ; ALetter # Lo DEVANAGARI LETTER CANDRA A
|
||||
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
|
||||
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
|
||||
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
|
||||
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
|
||||
09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
|
||||
09B2 ; ALetter # Lo BENGALI LETTER LA
|
||||
09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
|
||||
09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA
|
||||
09CE ; ALetter # Lo BENGALI LETTER KHANDA TA
|
||||
09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
|
||||
09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
|
||||
09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
|
||||
0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
|
||||
0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
|
||||
0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
|
||||
0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
|
||||
0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
|
||||
0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
|
||||
0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
|
||||
0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
|
||||
0A5E ; ALetter # Lo GURMUKHI LETTER FA
|
||||
0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
|
||||
0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
|
||||
0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
|
||||
0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
|
||||
0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
|
||||
0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
|
||||
0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
|
||||
0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA
|
||||
0AD0 ; ALetter # Lo GUJARATI OM
|
||||
0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
|
||||
0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
|
||||
0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
|
||||
0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
|
||||
0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
|
||||
0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
|
||||
0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
|
||||
0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA
|
||||
0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
|
||||
0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
|
||||
0B71 ; ALetter # Lo ORIYA LETTER WA
|
||||
0B83 ; ALetter # Lo TAMIL SIGN VISARGA
|
||||
0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
|
||||
0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
|
||||
0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
|
||||
0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
|
||||
0B9C ; ALetter # Lo TAMIL LETTER JA
|
||||
0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
|
||||
0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
|
||||
0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
|
||||
0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
|
||||
0BD0 ; ALetter # Lo TAMIL OM
|
||||
0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
|
||||
0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
|
||||
0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
|
||||
0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
|
||||
0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
|
||||
0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA
|
||||
0C58..0C59 ; ALetter # Lo [2] TELUGU LETTER TSA..TELUGU LETTER DZA
|
||||
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
|
||||
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
|
||||
0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
|
||||
0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
|
||||
0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
|
||||
0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
|
||||
0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA
|
||||
0CDE ; ALetter # Lo KANNADA LETTER FA
|
||||
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
|
||||
0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
|
||||
0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA
|
||||
0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
|
||||
0D7A..0D7F ; ALetter # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
|
||||
0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
|
||||
0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
|
||||
0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA
|
||||
0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
|
||||
0F00 ; ALetter # Lo TIBETAN SYLLABLE OM
|
||||
0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
|
||||
0F49..0F6C ; ALetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
|
||||
0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
|
||||
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
|
||||
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
|
||||
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
|
||||
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
|
||||
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
|
||||
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
|
||||
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
|
||||
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
|
||||
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
|
||||
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
|
||||
125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
|
||||
1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
|
||||
128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
|
||||
1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
|
||||
12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
|
||||
12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
|
||||
12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA
|
||||
12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
|
||||
12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
|
||||
12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
|
||||
1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
|
||||
1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
|
||||
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
|
||||
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
|
||||
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
|
||||
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
|
||||
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
|
||||
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
|
||||
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
|
||||
1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
|
||||
170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
|
||||
1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
|
||||
1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA
|
||||
1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
|
||||
176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
|
||||
1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
|
||||
1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
|
||||
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
|
||||
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
|
||||
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
|
||||
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
|
||||
1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
|
||||
1B83..1BA0 ; ALetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
|
||||
1BAE..1BAF ; ALetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
|
||||
1C00..1C23 ; ALetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A
|
||||
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
|
||||
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
|
||||
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
|
||||
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
|
||||
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
|
||||
1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN
|
||||
1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
|
||||
1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
|
||||
1E00..1F15 ; ALetter # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||||
1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
|
||||
1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
|
||||
1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
|
||||
1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
|
||||
1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBE ; ALetter # L& GREEK PROSGEGRAMMENI
|
||||
1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
|
||||
2107 ; ALetter # L& EULER CONSTANT
|
||||
210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
|
||||
2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N
|
||||
2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
|
||||
2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z
|
||||
2126 ; ALetter # L& OHM SIGN
|
||||
2128 ; ALetter # L& BLACK-LETTER CAPITAL Z
|
||||
212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
|
||||
212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O
|
||||
2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL
|
||||
2139 ; ALetter # L& INFORMATION SOURCE
|
||||
213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
|
||||
2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
|
||||
214E ; ALetter # L& TURNED SMALL F
|
||||
2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
|
||||
2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
|
||||
2185..2188 ; ALetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
|
||||
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
2C60..2C6F ; ALetter # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
|
||||
2C71..2C7C ; ALetter # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
|
||||
2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V
|
||||
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
|
||||
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
|
||||
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
|
||||
2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
|
||||
2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
|
||||
2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
|
||||
2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
|
||||
2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
|
||||
2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
|
||||
2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
|
||||
2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
|
||||
2E2F ; ALetter # Lm VERTICAL TILDE
|
||||
3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK
|
||||
303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
303C ; ALetter # Lo MASU MARK
|
||||
3105..312D ; ALetter # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
|
||||
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
|
||||
31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
|
||||
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
|
||||
A015 ; ALetter # Lm YI SYLLABLE WU
|
||||
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
|
||||
A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
|
||||
A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER
|
||||
A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
|
||||
A62A..A62B ; ALetter # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
|
||||
A640..A65F ; ALetter # L& [32] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER YN
|
||||
A662..A66D ; ALetter # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
|
||||
A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O
|
||||
A67F ; ALetter # Lm CYRILLIC PAYEROK
|
||||
A680..A697 ; ALetter # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
|
||||
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
|
||||
A770 ; ALetter # Lm MODIFIER LETTER US
|
||||
A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
|
||||
A78B..A78C ; ALetter # L& [2] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER SALTILLO
|
||||
A7FB..A801 ; ALetter # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I
|
||||
A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
|
||||
A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
|
||||
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
|
||||
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
|
||||
A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
|
||||
A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
|
||||
A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A
|
||||
AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA
|
||||
AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
|
||||
AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
|
||||
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
|
||||
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
|
||||
FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
|
||||
FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
|
||||
FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
|
||||
FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH
|
||||
FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
|
||||
FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
|
||||
FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
|
||||
FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
|
||||
FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
|
||||
FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
|
||||
FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
|
||||
FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
|
||||
FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
||||
10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
|
||||
1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
|
||||
10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
|
||||
1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
|
||||
1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
|
||||
10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
|
||||
10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
|
||||
10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
|
||||
10280..1029C ; ALetter # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X
|
||||
102A0..102D0 ; ALetter # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3
|
||||
10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
|
||||
10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
|
||||
10341 ; ALetter # Nl GOTHIC LETTER NINETY
|
||||
10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
|
||||
1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED
|
||||
10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
|
||||
103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
|
||||
103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
|
||||
103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
|
||||
10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
|
||||
10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
|
||||
10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
|
||||
10808 ; ALetter # Lo CYPRIOT SYLLABLE JO
|
||||
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
|
||||
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
|
||||
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
|
||||
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
|
||||
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
|
||||
10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
|
||||
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
|
||||
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
|
||||
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
|
||||
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
|
||||
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
|
||||
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
|
||||
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
|
||||
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
|
||||
1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G
|
||||
1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
|
||||
1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
|
||||
1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
|
||||
1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F
|
||||
1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
|
||||
1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
|
||||
1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
|
||||
1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
|
||||
1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
|
||||
1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
|
||||
1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
|
||||
1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
|
||||
1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
|
||||
1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
|
||||
1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
|
||||
1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
|
||||
1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
|
||||
1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
|
||||
1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
|
||||
1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
|
||||
1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
|
||||
1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
|
||||
1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
|
||||
1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
|
||||
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
|
||||
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
|
||||
|
||||
# Total code points: 21903
|
||||
|
||||
# ================================================
|
||||
|
||||
003A ; MidLetter # Po COLON
|
||||
00B7 ; MidLetter # Po MIDDLE DOT
|
||||
0387 ; MidLetter # Po GREEK ANO TELEIA
|
||||
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
|
||||
2027 ; MidLetter # Po HYPHENATION POINT
|
||||
FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON
|
||||
FE55 ; MidLetter # Po SMALL COLON
|
||||
FF1A ; MidLetter # Po FULLWIDTH COLON
|
||||
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
002C ; MidNum # Po COMMA
|
||||
003B ; MidNum # Po SEMICOLON
|
||||
037E ; MidNum # Po GREEK QUESTION MARK
|
||||
0589 ; MidNum # Po ARMENIAN FULL STOP
|
||||
060C..060D ; MidNum # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR
|
||||
066C ; MidNum # Po ARABIC THOUSANDS SEPARATOR
|
||||
07F8 ; MidNum # Po NKO COMMA
|
||||
2044 ; MidNum # Sm FRACTION SLASH
|
||||
FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA
|
||||
FE14 ; MidNum # Po PRESENTATION FORM FOR VERTICAL SEMICOLON
|
||||
FE50 ; MidNum # Po SMALL COMMA
|
||||
FE54 ; MidNum # Po SMALL SEMICOLON
|
||||
FF0C ; MidNum # Po FULLWIDTH COMMA
|
||||
FF1B ; MidNum # Po FULLWIDTH SEMICOLON
|
||||
|
||||
# Total code points: 15
|
||||
|
||||
# ================================================
|
||||
|
||||
0027 ; MidNumLet # Po APOSTROPHE
|
||||
002E ; MidNumLet # Po FULL STOP
|
||||
2018 ; MidNumLet # Pi LEFT SINGLE QUOTATION MARK
|
||||
2019 ; MidNumLet # Pf RIGHT SINGLE QUOTATION MARK
|
||||
2024 ; MidNumLet # Po ONE DOT LEADER
|
||||
FE52 ; MidNumLet # Po SMALL FULL STOP
|
||||
FF07 ; MidNumLet # Po FULLWIDTH APOSTROPHE
|
||||
FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
|
||||
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE
|
||||
0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
066B ; Numeric # Po ARABIC DECIMAL SEPARATOR
|
||||
06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
|
||||
0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
|
||||
0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
|
||||
0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
|
||||
0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
|
||||
0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
|
||||
0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
|
||||
0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
|
||||
1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
|
||||
1090..1099 ; Numeric # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
|
||||
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
|
||||
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
|
||||
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
|
||||
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
|
||||
1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
|
||||
1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
|
||||
1C50..1C59 ; Numeric # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
|
||||
A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
|
||||
A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
|
||||
A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
|
||||
AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
|
||||
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
|
||||
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
|
||||
# Total code points: 361
|
||||
|
||||
# ================================================
|
||||
|
||||
005F ; ExtendNumLet # Pc LOW LINE
|
||||
203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE
|
||||
2054 ; ExtendNumLet # Pc INVERTED UNDERTIE
|
||||
FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
|
||||
FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE
|
||||
FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# EOF
|
|
@ -1,10 +1,10 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: combinators.short-circuit assocs math kernel sequences
|
||||
io.files hashtables quotations splitting grouping arrays
|
||||
io.files hashtables quotations splitting grouping arrays io
|
||||
math.parser hash2 math.order byte-arrays words namespaces words
|
||||
compiler.units parser io.encodings.ascii values interval-maps
|
||||
ascii sets combinators locals math.ranges sorting ;
|
||||
ascii sets combinators locals math.ranges sorting make io.encodings.utf8 ;
|
||||
IN: unicode.data
|
||||
|
||||
VALUE: simple-lower
|
||||
|
@ -70,10 +70,20 @@ VALUE: properties
|
|||
5 swap (process-data)
|
||||
[ " " split [ hex> ] map ] assoc-map ;
|
||||
|
||||
: exclusions-file ( -- filename )
|
||||
"resource:basis/unicode/data/CompositionExclusions.txt" ;
|
||||
|
||||
: exclusions ( -- set )
|
||||
exclusions-file utf8 file-lines
|
||||
[ "#" split1 drop [ blank? ] trim-right hex> ] map harvest ;
|
||||
|
||||
: remove-exclusions ( alist -- alist )
|
||||
exclusions [ dup ] H{ } map>assoc assoc-diff ;
|
||||
|
||||
: process-canonical ( data -- hash2 hash )
|
||||
(process-decomposed) [ first* ] filter
|
||||
[
|
||||
[ second length 2 = ] filter
|
||||
[ second length 2 = ] filter remove-exclusions
|
||||
! using 1009 as the size, the maximum load is 4
|
||||
[ first2 first2 rot 3array ] map 1009 alist>hash2
|
||||
] [ >hashtable chain-decomposed ] bi ;
|
||||
|
@ -102,6 +112,7 @@ VALUE: properties
|
|||
"Cc" "Cf" "Cs" "Co" } ;
|
||||
|
||||
: num-chars HEX: 2FA1E ;
|
||||
|
||||
! the maximum unicode char in the first 3 planes
|
||||
|
||||
: ?set-nth ( val index seq -- )
|
||||
|
@ -179,3 +190,31 @@ load-data {
|
|||
load-special-casing to: special-casing
|
||||
|
||||
load-properties to: properties
|
||||
|
||||
! Utility to load resource files that look like Scripts.txt
|
||||
|
||||
SYMBOL: interned
|
||||
|
||||
: parse-script ( stream -- assoc )
|
||||
! assoc is code point/range => name
|
||||
lines filter-comments [ split-; ] map ;
|
||||
|
||||
: range, ( value key -- )
|
||||
swap interned get
|
||||
[ = ] with find nip 2array , ;
|
||||
|
||||
: expand-ranges ( assoc -- interval-map )
|
||||
[
|
||||
[
|
||||
CHAR: . pick member? [
|
||||
swap ".." split1 [ hex> ] bi@ 2array
|
||||
] [ swap hex> ] if range,
|
||||
] assoc-each
|
||||
] { } make <interval-map> ;
|
||||
|
||||
: process-script ( ranges -- table )
|
||||
dup values prune interned
|
||||
[ expand-ranges ] with-variable ;
|
||||
|
||||
: load-script ( filename -- table )
|
||||
ascii <file-reader> parse-script process-script ;
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
USING: help.syntax help.markup strings ;
|
||||
IN: unicode.normalize
|
||||
|
||||
ABOUT: "unicode.normalize"
|
||||
|
||||
ARTICLE: "unicode.normalize" "Unicode normalization"
|
||||
"The " { $vocab-link "unicode.normalize" "unicode.normalize" } " vocabulary defines words for normalizing Unicode strings. In Unicode, it is often possible to have multiple sequences of characters which really represent exactly the same thing. For example, to represent e with an acute accent above, there are two possible strings: \"e\\u000301\" (the e character, followed by the combining acute accent character) and \"\\u0000e9\" (a single character, e with an acute accent). There are four normalization forms: NFD, NFC, NFKD, and NFKC. Basically, in NFD and NFKD, everything is expanded, whereas in NFC and NFKC, everything is contracted. In NFKD and NFKC, more things are expanded and contracted. This is a process which loses some information, so it should be done only with care. Most of the world uses NFC to communicate, but for many purposes, NFD/NFKD is easier to process. For more information, see Unicode Standard Annex #15 and section 3 of the Unicode standard."
|
||||
{ $subsection nfc }
|
||||
{ $subsection nfd }
|
||||
{ $subsection nfkc }
|
||||
{ $subsection nfkd }
|
||||
"If two strings in a normalization form are appended, the result may not be in that normalization form still. To append two strings in NFD and make sure the result is in NFD, the following procedure is supplied:"
|
||||
{ $subsection string-append } ;
|
||||
|
||||
HELP: nfc
|
||||
{ $values { "string" string } { "nfc" "a string in NFC" } }
|
||||
{ $description "Converts a string to Normalization Form C" } ;
|
||||
|
||||
HELP: nfd
|
||||
{ $values { "string" string } { "nfd" "a string in NFD" } }
|
||||
{ $description "Converts a string to Normalization Form D" } ;
|
||||
|
||||
HELP: nfkc
|
||||
{ $values { "string" string } { "nfkc" "a string in NFKC" } }
|
||||
{ $description "Converts a string to Normalization Form KC" } ;
|
||||
|
||||
HELP: nfkd
|
||||
{ $values { "string" string } { "nfc" "a string in NFKD" } }
|
||||
{ $description "Converts a string to Normalization Form KD" } ;
|
||||
|
||||
HELP: string-append
|
||||
{ $values { "s1" "a string in NFD" } { "s2" "a string in NFD" } { "string" "a string in NFD" } }
|
||||
{ $description "Appends two strings, putting the result in NFD." } ;
|
|
@ -41,4 +41,4 @@ IN: unicode.normalize.tests
|
|||
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
|
||||
} cleave ;
|
||||
|
||||
! parse-test [ run-line ] each
|
||||
parse-test [ run-line ] each
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences namespaces make unicode.data kernel math arrays
|
||||
locals sorting.insertion accessors ;
|
||||
locals sorting.insertion accessors assocs ;
|
||||
IN: unicode.normalize
|
||||
|
||||
<PRIVATE
|
||||
! Conjoining Jamo behavior
|
||||
|
||||
: hangul-base HEX: ac00 ; inline
|
||||
|
@ -74,10 +75,12 @@ IN: unicode.normalize
|
|||
dup reorder
|
||||
] if ; inline
|
||||
|
||||
: nfd ( string -- string )
|
||||
PRIVATE>
|
||||
|
||||
: nfd ( string -- nfd )
|
||||
[ canonical-entry ] decompose ;
|
||||
|
||||
: nfkd ( string -- string )
|
||||
: nfkd ( string -- nfkd )
|
||||
[ compatibility-entry ] decompose ;
|
||||
|
||||
: string-append ( s1 s2 -- string )
|
||||
|
@ -87,6 +90,8 @@ IN: unicode.normalize
|
|||
0 over ?nth non-starter?
|
||||
[ length dupd reorder-back ] [ drop ] if ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! Normalization -- Composition
|
||||
SYMBOL: main-str
|
||||
SYMBOL: ind
|
||||
|
@ -107,26 +112,34 @@ SYMBOL: char
|
|||
current to current to current jamo>hangul , ;
|
||||
|
||||
: im, ( -- )
|
||||
current to current 0 jamo>hangul , ;
|
||||
current to current final-base jamo>hangul , ;
|
||||
|
||||
: compose-jamo ( -- )
|
||||
initial-medial? [
|
||||
--final? [ imf, ] [ im, ] if
|
||||
] when to current jamo? [ compose-jamo ] when ;
|
||||
] [ current , ] if to ;
|
||||
|
||||
: pass-combining ( -- )
|
||||
current non-starter? [ current , to pass-combining ] when ;
|
||||
|
||||
: try-compose ( last-class char current-class -- )
|
||||
swapd = [ after get push ] [
|
||||
char get over combine-chars
|
||||
[ nip char set ] [ after get push ] if*
|
||||
:: try-compose ( last-class new-char current-class -- new-class )
|
||||
last-class current-class = [ new-char after get push last-class ] [
|
||||
char get new-char combine-chars
|
||||
[ char set last-class ]
|
||||
[ new-char after get push current-class ] if*
|
||||
] if ;
|
||||
|
||||
: compose-iter ( n -- )
|
||||
DEFER: compose-iter
|
||||
|
||||
: try-noncombining ( char -- )
|
||||
char get swap combine-chars
|
||||
[ char set to f compose-iter ] when* ;
|
||||
|
||||
: compose-iter ( last-class -- )
|
||||
current [
|
||||
dup combining-class dup
|
||||
[ [ try-compose ] keep to compose-iter ] [ 3drop ] if
|
||||
dup combining-class
|
||||
[ try-compose to compose-iter ]
|
||||
[ swap [ drop ] [ try-noncombining ] if ] if*
|
||||
] [ drop ] if* ;
|
||||
|
||||
: ?new-after ( -- )
|
||||
|
@ -136,9 +149,8 @@ SYMBOL: char
|
|||
current [
|
||||
dup jamo? [ drop compose-jamo ] [
|
||||
char set to ?new-after
|
||||
0 compose-iter
|
||||
f compose-iter
|
||||
char get , after get %
|
||||
to
|
||||
] if (compose)
|
||||
] when* ;
|
||||
|
||||
|
@ -150,8 +162,10 @@ SYMBOL: char
|
|||
pass-combining (compose)
|
||||
] "" make ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: nfc ( string -- nfc )
|
||||
nfd compose ;
|
||||
|
||||
: nfkc ( string -- nfkc )
|
||||
nfkc compose ;
|
||||
nfkd compose ;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: unicode.script tools.test ;
|
||||
|
||||
[ Latin ] [ CHAR: a script-of ] unit-test
|
||||
[ Common ] [ 0 script-of ] unit-test
|
||||
[ "Latin" ] [ CHAR: a script-of ] unit-test
|
||||
[ "Common" ] [ 0 script-of ] unit-test
|
||||
|
|
|
@ -7,45 +7,10 @@ words words.symbol compiler.units arrays interval-maps
|
|||
unicode.data ;
|
||||
IN: unicode.script
|
||||
|
||||
<PRIVATE
|
||||
VALUE: script-table
|
||||
SYMBOL: interned
|
||||
|
||||
: parse-script ( stream -- assoc )
|
||||
! assoc is code point/range => name
|
||||
lines filter-comments [ split-; ] map ;
|
||||
|
||||
: range, ( value key -- )
|
||||
swap interned get
|
||||
[ name>> = ] with find nip 2array , ;
|
||||
|
||||
: expand-ranges ( assoc -- interval-map )
|
||||
[
|
||||
[
|
||||
CHAR: . pick member? [
|
||||
swap ".." split1 [ hex> ] bi@ 2array
|
||||
] [ swap hex> ] if range,
|
||||
] assoc-each
|
||||
] { } make <interval-map> ;
|
||||
|
||||
: >symbols ( strings -- symbols )
|
||||
[
|
||||
[ "unicode.script" create dup define-symbol ] map
|
||||
] with-compilation-unit ;
|
||||
|
||||
: process-script ( ranges -- )
|
||||
dup values prune >symbols interned [
|
||||
expand-ranges to: script-table
|
||||
] with-variable ;
|
||||
|
||||
: load-script ( -- )
|
||||
"resource:basis/unicode/script/Scripts.txt"
|
||||
ascii <file-reader> parse-script process-script ;
|
||||
|
||||
load-script
|
||||
PRIVATE>
|
||||
|
||||
SYMBOL: Unknown
|
||||
"resource:basis/unicode/script/Scripts.txt" load-script
|
||||
to: script-table
|
||||
|
||||
: script-of ( char -- script )
|
||||
script-table interval-at [ Unknown ] unless* ;
|
||||
script-table interval-at ;
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
USING: help.markup help.syntax ;
|
||||
IN: unicode
|
||||
|
||||
ARTICLE: "unicode" "Unicode"
|
||||
"Unicode is a set of characters, or " { $emphasis "code points" } " covering what's used in most world writing systems. Any Factor string can hold any of these code points transparently; a factor string is a sequence of Unicode code points. Unicode is accompanied by several standard algorithms for common operations like encoding in files, capitalizing a string, finding the boundaries between words, etc. When a programmer is faced with a string manipulation problem, where the string represents human language, a Unicode algorithm is often much better than the naive one. This is not in terms of efficiency, but rather internationalization. Even English text that remains in ASCII is better served by the Unicode collation algorithm than a naive algorithm. The Unicode algorithms implemented here are:"
|
||||
{ $vocab-subsection "Case mapping" "unicode.case" }
|
||||
{ $vocab-subsection "Collation and weak comparison" "unicode.collation" }
|
||||
{ $vocab-subsection "Character classes" "unicode.categories" }
|
||||
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
|
||||
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
|
||||
"The following are mostly for internal use:"
|
||||
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
|
||||
{ $vocab-subsection "Unicode data tables" "unicode.data" }
|
||||
{ $see-also "io.encodings" } ;
|
||||
|
||||
ABOUT: "unicode"
|
|
@ -0,0 +1 @@
|
|||
IN: unicode
|
|
@ -50,7 +50,7 @@ HELP: set-real-user
|
|||
HELP: user-passwd
|
||||
{ $values
|
||||
{ "obj" object }
|
||||
{ "passwd" passwd } }
|
||||
{ "passwd/f" "passwd or f" } }
|
||||
{ $description "Returns the passwd tuple given a username string or user id." } ;
|
||||
|
||||
HELP: username
|
||||
|
|
|
@ -24,3 +24,7 @@ IN: unix.users.tests
|
|||
[ ] [ effective-user-id [ ] with-effective-user ] unit-test
|
||||
|
||||
[ ] [ [ ] with-user-cache ] unit-test
|
||||
|
||||
[ "9999999999999999999" ] [ 9999999999999999999 username ] unit-test
|
||||
|
||||
[ f ] [ 89898989898989898989898989898 user-passwd ] unit-test
|
||||
|
|
|
@ -47,17 +47,18 @@ SYMBOL: user-cache
|
|||
: with-user-cache ( quot -- )
|
||||
[ <user-cache> user-cache ] dip with-variable ; inline
|
||||
|
||||
GENERIC: user-passwd ( obj -- passwd )
|
||||
GENERIC: user-passwd ( obj -- passwd/f )
|
||||
|
||||
M: integer user-passwd ( id -- passwd/f )
|
||||
user-cache get
|
||||
[ at ] [ getpwuid passwd>new-passwd ] if* ;
|
||||
[ at ] [ getpwuid [ passwd>new-passwd ] [ f ] if* ] if* ;
|
||||
|
||||
M: string user-passwd ( string -- passwd/f )
|
||||
getpwnam dup [ passwd>new-passwd ] when ;
|
||||
|
||||
: username ( id -- string )
|
||||
user-passwd username>> ;
|
||||
dup user-passwd
|
||||
[ nip username>> ] [ number>string ] if* ;
|
||||
|
||||
: user-id ( string -- id )
|
||||
user-passwd uid>> ;
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
|
||||
USING: accessors arrays assocs colors combinators.short-circuit
|
||||
kernel locals math math.functions math.matrices math.order
|
||||
math.parser math.trig math.vectors opengl opengl.demo-support
|
||||
opengl.gl sbufs sequences strings ui.gadgets ui.gadgets.worlds
|
||||
ui.gestures ui.render ;
|
||||
USING: accessors arrays assocs calendar colors
|
||||
combinators.short-circuit help.markup help.syntax kernel locals
|
||||
math math.functions math.matrices math.order math.parser
|
||||
math.trig math.vectors opengl opengl.demo-support opengl.gl
|
||||
sbufs sequences strings threads ui.gadgets ui.gadgets.worlds
|
||||
ui.gestures ui.render ui.tools.workspace ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
@ -255,8 +256,26 @@ DEFER: default-L-parser-values
|
|||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
TUPLE: <L-system> < gadget
|
||||
camera display-list
|
||||
commands axiom rules string ;
|
||||
camera display-list pedestal paused commands axiom rules string ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
:: iterate-system ( GADGET -- ) GADGET pedestal>> 0.5 + GADGET (>>pedestal) ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
:: start-rotation-thread ( GADGET -- )
|
||||
GADGET f >>paused drop
|
||||
[
|
||||
[
|
||||
GADGET paused>>
|
||||
[ f ]
|
||||
[ GADGET iterate-system GADGET relayout-1 25 milliseconds sleep t ]
|
||||
if
|
||||
]
|
||||
loop
|
||||
]
|
||||
in-thread ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
@ -332,7 +351,7 @@ TUPLE: <L-system> < gadget
|
|||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
:: iterate-L-system-string ( L-SYSTEM -- )
|
||||
L-SYSTEM string>>
|
||||
L-SYSTEM string>> L-SYSTEM axiom>> or
|
||||
L-SYSTEM rules>>
|
||||
iterate-string
|
||||
L-SYSTEM (>>string) ;
|
||||
|
@ -357,7 +376,7 @@ TUPLE: <L-system> < gadget
|
|||
L-SYSTEM display-list>> GL_COMPILE glNewList
|
||||
|
||||
turtle
|
||||
L-SYSTEM string>>
|
||||
L-SYSTEM string>> L-SYSTEM axiom>> or
|
||||
L-SYSTEM commands>>
|
||||
interpret-string
|
||||
drop
|
||||
|
@ -387,6 +406,10 @@ M:: <L-system> draw-gadget* ( L-SYSTEM -- )
|
|||
! draw axis
|
||||
white gl-color GL_LINES glBegin { 0 0 0 } gl-vertex { 0 0 1 } gl-vertex glEnd
|
||||
|
||||
! rotate pedestal
|
||||
|
||||
L-SYSTEM pedestal>> 0 0 1 glRotated
|
||||
|
||||
L-SYSTEM display-list>> glCallList ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
@ -403,16 +426,12 @@ M:: <L-system> pref-dim* ( L-SYSTEM -- dim ) { 400 400 } ;
|
|||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
:: camera-left ( L-SYSTEM -- )
|
||||
L-SYSTEM camera>> 5 turn-left drop
|
||||
L-SYSTEM relayout-1 ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
:: with-camera ( L-SYSTEM QUOT -- )
|
||||
L-SYSTEM camera>> QUOT call drop
|
||||
L-SYSTEM relayout-1 ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
<L-system>
|
||||
H{
|
||||
{ T{ key-down f f "LEFT" } [ [ 5 turn-left ] with-camera ] }
|
||||
|
@ -423,6 +442,11 @@ H{
|
|||
{ T{ key-down f f "a" } [ [ 1 step-turtle ] with-camera ] }
|
||||
{ T{ key-down f f "z" } [ [ -1 step-turtle ] with-camera ] }
|
||||
|
||||
{ T{ key-down f f "q" } [ [ 5 roll-left ] with-camera ] }
|
||||
{ T{ key-down f f "w" } [ [ 5 roll-right ] with-camera ] }
|
||||
|
||||
{ T{ key-down f f "r" } [ start-rotation-thread ] }
|
||||
|
||||
{
|
||||
T{ key-down f f "x" }
|
||||
[
|
||||
|
@ -432,6 +456,8 @@ H{
|
|||
drop
|
||||
]
|
||||
}
|
||||
|
||||
{ T{ key-down f f "F1" } [ drop "L-system" help-window ] }
|
||||
|
||||
}
|
||||
set-gestures
|
||||
|
@ -441,8 +467,36 @@ set-gestures
|
|||
: L-system ( -- L-system )
|
||||
|
||||
<L-system> new-gadget
|
||||
|
||||
0 >>pedestal
|
||||
|
||||
turtle 45 turn-left 45 pitch-up 5 step-turtle 180 turn-left >>camera ;
|
||||
! turtle 45 turn-left 45 pitch-up 5 step-turtle 180 turn-left >>camera ;
|
||||
|
||||
turtle 90 pitch-down -5 step-turtle 2 strafe-up >>camera
|
||||
|
||||
dup start-rotation-thread
|
||||
|
||||
;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
ARTICLE: "L-system" "L-system"
|
||||
|
||||
"Press 'x' to iterate the L-system." $nl
|
||||
|
||||
"Camera control:"
|
||||
|
||||
{ $table
|
||||
|
||||
{ "a" "Forward" }
|
||||
{ "z" "Backward" }
|
||||
|
||||
{ "LEFT" "Turn left" }
|
||||
{ "RIGHT" "Turn right" }
|
||||
{ "UP" "Pitch down" }
|
||||
{ "DOWN" "Pitch up" }
|
||||
|
||||
{ "q" "Roll left" }
|
||||
{ "w" "Roll right" } } ;
|
||||
|
||||
ABOUT: "L-system"
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
USING: accessors kernel ui L-system ;
|
||||
USING: accessors ui L-system ;
|
||||
|
||||
IN: L-system.models.abop-1
|
||||
|
||||
|
@ -12,15 +12,13 @@ IN: L-system.models.abop-1
|
|||
"c(12)FFAL" >>axiom
|
||||
|
||||
{
|
||||
{ "A" "F[&'(.8)!BL]>(137)'!(.9)A" }
|
||||
{ "B" "F[-'(.8)!(.9)$CL]'!(.9)C" }
|
||||
{ "C" "F[+'(.8)!(.9)$BL]'!(.9)B" }
|
||||
{ "A" "F [ & '(.8) ! B L ] >(137) ' !(.9) A" }
|
||||
{ "B" "F [ - '(.8) !(.9) $ C L ] ' !(.9) C" }
|
||||
{ "C" "F [ + '(.8) !(.9) $ B L ] ' !(.9) B" }
|
||||
|
||||
{ "L" "~c(8){+(30)f-(120)f-(120)f}" }
|
||||
{ "L" " ~ c(8) { +(30) f -(120) f -(120) f }" }
|
||||
}
|
||||
>>rules
|
||||
|
||||
dup axiom>> >>string ;
|
||||
>>rules ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
|
||||
USING: accessors ui L-system ;
|
||||
|
||||
IN: L-system.models.abop-2
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: abop-2 ( <L-system> -- <L-system> )
|
||||
|
||||
L-parser-dialect >>commands
|
||||
|
||||
"c(12)FAL" >>axiom
|
||||
|
||||
{
|
||||
{ "A" "F[&'(.7)!BL]>(137)[&'(.6)!BL]>(137)'(.9)!(.9)A" }
|
||||
{ "B" "F[-'(.7)!(.9)$CL]'(.9)!(.9)C" }
|
||||
{ "C" "F[+'(.7)!(.9)$BL]'(.9)!(.9)B" }
|
||||
|
||||
{ "L" "~c(8){+f(.1)-f(.1)-f(.1)+|+f(.1)-f(.1)-f(.1)}" }
|
||||
|
||||
} >>rules ;
|
||||
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: main ( -- ) [ L-system abop-2 "L-system" open-window ] with-ui ;
|
||||
|
||||
MAIN: main
|
||||
|
|
@ -1,13 +1,17 @@
|
|||
! Copyright (C) 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: binary-search kernel math.primes.list math.ranges sequences
|
||||
prettyprint ;
|
||||
USING: binary-search compiler.units kernel math.primes math.ranges
|
||||
memoize prettyprint sequences ;
|
||||
IN: benchmark.binary-search
|
||||
|
||||
: binary-search-benchmark ( -- )
|
||||
1 1000000 [a,b] [ primes-under-million sorted-member? ] map length . ;
|
||||
[
|
||||
MEMO: primes-under-million ( -- seq ) 1000000 primes-upto ;
|
||||
] with-compilation-unit
|
||||
|
||||
! Force computation of the primes list before benchmarking the binary search
|
||||
primes-under-million drop
|
||||
|
||||
: binary-search-benchmark ( -- )
|
||||
1 1000000 [a,b] [ primes-under-million sorted-member? ] map length . ;
|
||||
|
||||
MAIN: binary-search-benchmark
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
USING: checksums checksums.crc32 io.encodings.ascii io.files kernel math ;
|
||||
IN: benchmark.crc32
|
||||
|
||||
: crc32-primes-list ( -- )
|
||||
: crc32-file ( -- )
|
||||
10 [
|
||||
"resource:extra/math/primes/list/list.factor"
|
||||
"resource:basis/mime/multipart/multipart-tests.factor"
|
||||
crc32 checksum-file drop
|
||||
] times ;
|
||||
|
||||
MAIN: crc32-primes-list
|
||||
MAIN: crc32-file
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
USING: checksums checksums.md5 io.files kernel ;
|
||||
IN: benchmark.md5
|
||||
|
||||
: md5-primes-list ( -- )
|
||||
"resource:extra/math/primes/list/list.factor" md5 checksum-file drop ;
|
||||
: md5-file ( -- )
|
||||
"resource:basis/mime/multipart/multipart-tests.factor" md5 checksum-file drop ;
|
||||
|
||||
MAIN: md5-primes-list
|
||||
MAIN: md5-file
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
USING: checksums checksums.sha1 io.files kernel ;
|
||||
IN: benchmark.sha1
|
||||
|
||||
: sha1-primes-list ( -- )
|
||||
"resource:extra/math/primes/list/list.factor" sha1 checksum-file drop ;
|
||||
: sha1-file ( -- )
|
||||
"resource:basis/mime/multipart/multipart-tests.factor" sha1 checksum-file drop ;
|
||||
|
||||
MAIN: sha1-primes-list
|
||||
MAIN: sha1-file
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: assocs circular combinators continuations hashtables
|
||||
hashtables.private io kernel math namespaces prettyprint
|
||||
quotations sequences splitting state-parser strings ;
|
||||
quotations sequences splitting state-parser strings
|
||||
combinators.short-circuit ;
|
||||
IN: html.parser.utils
|
||||
|
||||
: string-parse-end? ( -- ? ) get-next not ;
|
||||
|
@ -13,26 +14,26 @@ IN: html.parser.utils
|
|||
dup length rot length 1- - head next* ;
|
||||
|
||||
: trim1 ( seq ch -- newseq )
|
||||
[ ?head drop ] [ ?tail drop ] bi ;
|
||||
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;
|
||||
|
||||
: single-quote ( str -- newstr )
|
||||
"'" dup surround ;
|
||||
: quote? ( ch -- ? ) "'\"" member? ;
|
||||
|
||||
: double-quote ( str -- newstr )
|
||||
"\"" dup surround ;
|
||||
: single-quote ( str -- newstr ) "'" dup surround ;
|
||||
|
||||
: double-quote ( str -- newstr ) "\"" dup surround ;
|
||||
|
||||
: quote ( str -- newstr )
|
||||
CHAR: ' over member?
|
||||
[ double-quote ] [ single-quote ] if ;
|
||||
|
||||
: quoted? ( str -- ? )
|
||||
[ f ]
|
||||
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] if-empty ;
|
||||
{
|
||||
[ length 1 > ]
|
||||
[ first quote? ]
|
||||
[ [ first ] [ peek ] bi = ]
|
||||
} 1&& ;
|
||||
|
||||
: ?quote ( str -- newstr )
|
||||
dup quoted? [ quote ] unless ;
|
||||
: ?quote ( str -- newstr ) dup quoted? [ quote ] unless ;
|
||||
|
||||
: unquote ( str -- newstr )
|
||||
dup quoted? [ but-last-slice rest-slice >string ] when ;
|
||||
|
||||
: quote? ( ch -- ? ) "'\"" member? ;
|
||||
|
|
|
@ -70,3 +70,8 @@ C: <nil> nil
|
|||
[ t ] [ pi [ pi ] matches? ] unit-test
|
||||
[ 0.0 ] [ 0.0 pi + [ pi + ] undo ] unit-test
|
||||
[ ] [ 3 [ _ ] undo ] unit-test
|
||||
|
||||
[ { 1 } ] [ { 1 2 3 } [ { 2 3 } append ] undo ] unit-test
|
||||
[ { 3 } ] [ { 1 2 3 } [ { 1 2 } prepend ] undo ] unit-test
|
||||
[ { 1 2 3 } [ { 1 2 } append ] undo ] must-fail
|
||||
[ { 1 2 3 } [ { 2 3 } prepend ] undo ] must-fail
|
||||
|
|
|
@ -4,13 +4,13 @@ USING: accessors kernel words summary slots quotations
|
|||
sequences assocs math arrays stack-checker effects generalizations
|
||||
continuations debugger classes.tuple namespaces make vectors
|
||||
bit-arrays byte-arrays strings sbufs math.functions macros
|
||||
sequences.private combinators mirrors
|
||||
sequences.private combinators mirrors splitting
|
||||
combinators.short-circuit fry words.symbol ;
|
||||
RENAME: _ fry => __
|
||||
IN: inverse
|
||||
|
||||
ERROR: fail ;
|
||||
M: fail summary drop "Unification failed" ;
|
||||
M: fail summary drop "Matching failed" ;
|
||||
|
||||
: assure ( ? -- ) [ fail ] unless ;
|
||||
|
||||
|
@ -208,6 +208,9 @@ DEFER: _
|
|||
\ unclip [ prefix ] define-inverse
|
||||
\ suffix [ dup but-last swap peek ] define-inverse
|
||||
|
||||
\ append 1 [ [ ?tail assure ] curry ] define-pop-inverse
|
||||
\ prepend 1 [ [ ?head assure ] curry ] define-pop-inverse
|
||||
|
||||
! Constructor inverse
|
||||
: deconstruct-pred ( class -- quot )
|
||||
"predicate" word-prop [ dupd call assure ] curry ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Samuel Tardieu
|
|
@ -1,3 +1,5 @@
|
|||
! Copyright (C) 2009 Samuel Tardieu.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: bit-arrays kernel math math.functions math.ranges sequences ;
|
||||
IN: math.primes.erato
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Eratosthene sieve
|
|
@ -1,6 +1,8 @@
|
|||
USING: math.primes.factors tools.test ;
|
||||
|
||||
{ { 999983 999983 1000003 } } [ 999969000187000867 factors ] unit-test
|
||||
{ { } } [ -5 factors ] unit-test
|
||||
{ { { 999983 2 } { 1000003 1 } } } [ 999969000187000867 group-factors ] unit-test
|
||||
{ { 999983 1000003 } } [ 999969000187000867 unique-factors ] unit-test
|
||||
{ 999967000236000612 } [ 999969000187000867 totient ] unit-test
|
||||
{ 0 } [ 1 totient ] unit-test
|
||||
|
|
|
@ -1,39 +1,36 @@
|
|||
! Copyright (C) 2007 Samuel Tardieu.
|
||||
! Copyright (C) 2007-2009 Samuel Tardieu.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays kernel lists make math math.primes sequences ;
|
||||
IN: math.primes.factors
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: (factor) ( n d -- n' )
|
||||
2dup mod zero? [ [ / ] keep dup , (factor) ] [ drop ] if ;
|
||||
: count-factor ( n d -- n' c )
|
||||
0 [ [ 2dup mod zero? ] dip swap ] [ [ [ / ] keep ] dip 1+ ] [ ] while nip ;
|
||||
|
||||
: (factor) ( n d -- n' ) dup [ , ] curry [ count-factor ] dip times ;
|
||||
|
||||
: (count) ( n d -- n' )
|
||||
[ (factor) ] { } make
|
||||
[ [ first ] [ length ] bi 2array , ] unless-empty ;
|
||||
dup [ swap 2array , ] curry
|
||||
[ count-factor dup zero? [ drop ] ] dip if ;
|
||||
|
||||
: (unique) ( n d -- n' )
|
||||
[ (factor) ] { } make
|
||||
[ first , ] unless-empty ;
|
||||
dup [ , ] curry [ count-factor zero? ] dip unless ;
|
||||
|
||||
: (factors) ( quot list n -- )
|
||||
dup 1 > [
|
||||
swap uncons swap [ pick call ] dip swap (factors)
|
||||
] [ 3drop ] if ;
|
||||
] [ 3drop ] if ; inline recursive
|
||||
|
||||
: (decompose) ( n quot -- seq )
|
||||
[ lprimes rot (factors) ] { } make ;
|
||||
: decompose ( n quot -- seq ) [ lprimes rot (factors) ] { } make ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: factors ( n -- seq )
|
||||
[ (factor) ] (decompose) ; foldable
|
||||
: factors ( n -- seq ) [ (factor) ] decompose ; flushable
|
||||
|
||||
: group-factors ( n -- seq )
|
||||
[ (count) ] (decompose) ; foldable
|
||||
: group-factors ( n -- seq ) [ (count) ] decompose ; flushable
|
||||
|
||||
: unique-factors ( n -- seq )
|
||||
[ (unique) ] (decompose) ; foldable
|
||||
: unique-factors ( n -- seq ) [ (unique) ] decompose ; flushable
|
||||
|
||||
: totient ( n -- t )
|
||||
dup 2 < [
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
Doug Coleman
|
|
@ -1,4 +0,0 @@
|
|||
USING: math.primes memoize ;
|
||||
IN: math.primes.list
|
||||
|
||||
MEMO: primes-under-million ( -- seq ) 1000000 primes-upto ;
|
|
@ -0,0 +1,3 @@
|
|||
USING: project-euler.057 tools.test ;
|
||||
|
||||
{ 153 } [ euler057 ] unit-test
|
Loading…
Reference in New Issue