unicode: Minor cleanups.
This fixes/breaks some of the tests. Why? { 3953 3958 3960 3968 3969 } [ canonical-map delete-at ] eachunicode-12.1.0
parent
566dd69f7c
commit
88d8d87061
|
@ -32,14 +32,7 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
|
||||||
|
|
||||||
"vocab:unicode/UCA/allkeys.txt" parse-ducet ducet set-global
|
"vocab:unicode/UCA/allkeys.txt" parse-ducet ducet set-global
|
||||||
|
|
||||||
! Fix up table for long contractions
|
! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
|
||||||
: help-one ( assoc key -- )
|
|
||||||
! Need to be more general? Not for DUCET, apparently
|
|
||||||
2 head 2dup swap key? [ 2drop ] [
|
|
||||||
[ [ 1string of ] with { } map-as concat ]
|
|
||||||
[ swap set-at ] 2bi
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: fixup-ducet ( -- )
|
: fixup-ducet ( -- )
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
|
@ -195,7 +188,8 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
|
||||||
}
|
}
|
||||||
} ducet get-global '[ swap >string _ set-at ] assoc-each ;
|
} ducet get-global '[ swap >string _ set-at ] assoc-each ;
|
||||||
|
|
||||||
! Add a few missing ducet values
|
! Add a few missing ducet values for Tibetan
|
||||||
|
! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
|
||||||
fixup-ducet
|
fixup-ducet
|
||||||
|
|
||||||
: tangut-block? ( char -- ? )
|
: tangut-block? ( char -- ? )
|
||||||
|
@ -248,6 +242,7 @@ fixup-ducet
|
||||||
: building-last ( -- char )
|
: building-last ( -- char )
|
||||||
building get [ 0 ] [ last last ] if-empty ;
|
building get [ 0 ] [ last last ] if-empty ;
|
||||||
|
|
||||||
|
! https://www.unicode.org/reports/tr10/tr10-41.html#Collation_Graphemes
|
||||||
: blocked? ( char -- ? )
|
: blocked? ( char -- ? )
|
||||||
combining-class dup { 0 f } member?
|
combining-class dup { 0 f } member?
|
||||||
[ drop building-last non-starter? ]
|
[ drop building-last non-starter? ]
|
||||||
|
|
|
@ -53,7 +53,7 @@ CONSTANT: categories {
|
||||||
MEMO: categories-map ( -- hashtable )
|
MEMO: categories-map ( -- hashtable )
|
||||||
categories H{ } zip-index-as ;
|
categories H{ } zip-index-as ;
|
||||||
|
|
||||||
CONSTANT: num-chars 0x2FA1E
|
CONSTANT: NUM-CHARS 0x2FA1E
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ PRIVATE>
|
||||||
|
|
||||||
! Loading data from UnicodeData.txt
|
! Loading data from UnicodeData.txt
|
||||||
|
|
||||||
: load-data ( -- data )
|
: load-unicode-data ( -- data )
|
||||||
"vocab:unicode/UCD/UnicodeData.txt" load-data-file ;
|
"vocab:unicode/UCD/UnicodeData.txt" load-data-file ;
|
||||||
|
|
||||||
: (process-data) ( index data -- newdata )
|
: (process-data) ( index data -- newdata )
|
||||||
|
@ -146,7 +146,7 @@ PRIVATE>
|
||||||
] assoc-each table ;
|
] assoc-each table ;
|
||||||
|
|
||||||
:: process-category ( data -- category-listing )
|
:: process-category ( data -- category-listing )
|
||||||
num-chars <byte-array> :> table
|
NUM-CHARS <byte-array> :> table
|
||||||
2 data (process-data) [| char cat |
|
2 data (process-data) [| char cat |
|
||||||
cat categories-map at char table ?set-nth
|
cat categories-map at char table ?set-nth
|
||||||
] assoc-each table fill-ranges ;
|
] assoc-each table fill-ranges ;
|
||||||
|
@ -194,7 +194,7 @@ C: <code-point> code-point
|
||||||
[ length 5 = ] filter
|
[ length 5 = ] filter
|
||||||
[ [ set-code-point ] each ] H{ } make ;
|
[ [ set-code-point ] each ] H{ } make ;
|
||||||
|
|
||||||
load-data {
|
load-unicode-data {
|
||||||
[ process-names name-map swap assoc-union! drop ]
|
[ process-names name-map swap assoc-union! drop ]
|
||||||
[ 13 swap process-data simple-lower swap assoc-union! drop ]
|
[ 13 swap process-data simple-lower swap assoc-union! drop ]
|
||||||
[ 12 swap process-data simple-upper swap assoc-union! drop ]
|
[ 12 swap process-data simple-upper swap assoc-union! drop ]
|
||||||
|
|
Loading…
Reference in New Issue