From be34c407d14ba374832c63a29b71e67b450de1ab Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Tue, 6 Jan 2009 10:19:19 -0600 Subject: [PATCH] unicode.breaks tests and bug fixes --- basis/unicode/breaks/GraphemeBreakTest.txt | 311 +++++++++++++++++++++ basis/unicode/breaks/breaks-tests.factor | 25 +- basis/unicode/breaks/breaks.factor | 24 +- 3 files changed, 355 insertions(+), 5 deletions(-) create mode 100644 basis/unicode/breaks/GraphemeBreakTest.txt diff --git a/basis/unicode/breaks/GraphemeBreakTest.txt b/basis/unicode/breaks/GraphemeBreakTest.txt new file mode 100644 index 0000000000..e0b5a25002 --- /dev/null +++ b/basis/unicode/breaks/GraphemeBreakTest.txt @@ -0,0 +1,311 @@ +# GraphemeBreakTest-5.1.0.txt +# Date: 2008-03-11, 02:19:22 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2008 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see UCD.html +# +# Default Grapheme Break Test +# +# Format: +# (# )? +# contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# the format can change, but currently it shows: +# - the sample character name +# - (x) the line_break property* for the sample character +# - [x] the rule that determines whether there is a break or not +# +# These samples may be extended or changed in the future. +# +÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (CR) ÷ [0.3] +÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (LF) ÷ [0.3] +÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0020 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0020 × 0308 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0020 ÷ # ÷ [0.2] (CR) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [4.0] (CR) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] (CR) × [3.0] (LF) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 000D ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [4.0] (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 000D ÷ 0E40 ÷ # ÷ [0.2] (CR) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 000D ÷ 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 1160 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 11A8 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ AC00 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0020 ÷ # ÷ [0.2] (LF) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [4.0] (CR) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [4.0] (LF) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 000A ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [4.0] (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 000A ÷ 0E40 ÷ # ÷ [0.2] (LF) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 000A ÷ 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 1160 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 11A8 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ AC00 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0020 ÷ # ÷ [0.2] (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 000D ÷ # ÷ [0.2] (Control) ÷ [4.0] (CR) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0001 ÷ 000A ÷ # ÷ [0.2] (Control) ÷ [4.0] (LF) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0001 ÷ 0001 ÷ # ÷ [0.2] (Control) ÷ [4.0] (Control) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0001 ÷ 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0001 ÷ 0E40 ÷ # ÷ [0.2] (Control) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0001 ÷ 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 1160 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ AC00 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0300 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0300 × 0308 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0E40 × 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] SPACE (Other) ÷ [0.3] +÷ 0E40 × 0308 ÷ 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0E40 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0E40 × 0308 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0E40 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0E40 × 0308 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0E40 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0E40 × 0308 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0E40 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0E40 × 0308 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0E40 × 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0E40 × 0308 ÷ 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0E40 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0E40 × 0308 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0E40 × 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0E40 × 0308 ÷ 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0E40 × 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0E40 × 0308 ÷ 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0E40 × 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0E40 × 0308 ÷ 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0E40 × AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0E40 × 0308 ÷ AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0E40 × AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0E40 × 0308 ÷ AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0903 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0903 × 0308 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (CR) ÷ [0.3] +÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (LF) ÷ [0.3] +÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1100 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 1100 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (CR) ÷ [0.3] +÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (LF) ÷ [0.3] +÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 1160 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 1160 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (CR) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (LF) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 11A8 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (CR) ÷ [0.3] +÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (LF) ÷ [0.3] +÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ AC00 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ AC00 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (CR) ÷ [0.3] +÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (LF) ÷ [0.3] +÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ AC01 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ AC01 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] +÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +# Lines: 288 diff --git a/basis/unicode/breaks/breaks-tests.factor b/basis/unicode/breaks/breaks-tests.factor index 77ba0e82fa..d6947756c6 100644 --- a/basis/unicode/breaks/breaks-tests.factor +++ b/basis/unicode/breaks/breaks-tests.factor @@ -1,7 +1,30 @@ -USING: tools.test unicode.breaks sequences math kernel ; +USING: tools.test unicode.breaks sequences math kernel splitting +unicode.categories io.pathnames io.encodings.utf8 io.files +strings quotations math.parser ; +IN: unicode.breaks.tests [ "\u001112\u001161\u0011abA\u000300a\r\r\n" ] [ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test [ "dcba" ] [ "abcd" string-reverse ] unit-test [ 3 ] [ "\u001112\u001161\u0011abA\u000300a" dup last-grapheme head last-grapheme ] unit-test + +: grapheme-break-test ( -- filename ) + "basis/unicode/breaks/GraphemeBreakTest.txt" + resource-path ; + +: parse-test-file ( -- tests ) + grapheme-break-test utf8 file-lines + [ "#" split1 drop ] map harvest [ + "÷" split + [ "×" split [ [ blank? ] trim hex> ] map harvest >string ] map + harvest + ] map ; + +: test-graphemes ( tests -- ) + [ + [ 1quotation ] + [ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test + ] each ; + +parse-test-file test-graphemes diff --git a/basis/unicode/breaks/breaks.factor b/basis/unicode/breaks/breaks.factor index 58c7a5d10e..a666b6f7db 100644 --- a/basis/unicode/breaks/breaks.factor +++ b/basis/unicode/breaks/breaks.factor @@ -7,12 +7,16 @@ io.encodings.ascii unicode.syntax unicode.data compiler.units alien.syntax sets ; IN: unicode.breaks -C-ENUM: Any L V T Extend Control CR LF graphemes ; +C-ENUM: Any L V T LV LVT Extend Control CR LF + SpacingMark Prepend graphemes ; : jamo-class ( ch -- class ) dup initial? [ drop L ] [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ; +: hangul-class ( ch -- class ) + hangul-base - HEX: 1C mod zero? LV LVT ? ; + CATEGORY: grapheme-control Zl Zp Cc Cf ; : control-class ( ch -- class ) { @@ -27,11 +31,19 @@ CATEGORY: (extend) Me Mn ; : extend? ( ch -- ? ) { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ; +: loe? ( ch -- ? ) + "Logical_Order_Exception" property? ; + +CATEGORY: spacing Mc ; + : grapheme-class ( ch -- class ) { { [ dup jamo? ] [ jamo-class ] } + { [ dup hangul? ] [ hangul-class ] } { [ dup grapheme-control? ] [ control-class ] } - { [ extend? ] [ Extend ] } + { [ dup extend? ] [ drop Extend ] } + { [ dup spacing? ] [ drop SpacingMark ] } + { [ loe? ] [ Prepend ] } [ Any ] } cond ; @@ -61,10 +73,14 @@ SYMBOL: table : make-grapheme-table ( -- ) CR LF connect Control CR LF 3array graphemes break-around - L L V 2array connect-before + L L V LV LVT 4array connect-before V V T 2array connect-before + LV V T 2array connect-before T T connect - graphemes Extend connect-after ; + LVT T connect + graphemes Extend connect-after + graphemes SpacingMark connect-after + Prepend graphemes connect-before ; VALUE: grapheme-table