unicode: Minor cleanups.

This fixes/breaks some of the tests. Why?
{ 3953 3958 3960 3968 3969 } [ canonical-map delete-at ] each
unicode-12.1.0
Doug Coleman 2019-07-28 15:51:05 -05:00
parent 566dd69f7c
commit 88d8d87061
2 changed files with 8 additions and 13 deletions

View File

@ -32,14 +32,7 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
"vocab:unicode/UCA/allkeys.txt" parse-ducet ducet set-global "vocab:unicode/UCA/allkeys.txt" parse-ducet ducet set-global
! Fix up table for long contractions ! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
: help-one ( assoc key -- )
! Need to be more general? Not for DUCET, apparently
2 head 2dup swap key? [ 2drop ] [
[ [ 1string of ] with { } map-as concat ]
[ swap set-at ] 2bi
] if ;
: fixup-ducet ( -- ) : fixup-ducet ( -- )
{ {
{ {
@ -195,7 +188,8 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
} }
} ducet get-global '[ swap >string _ set-at ] assoc-each ; } ducet get-global '[ swap >string _ set-at ] assoc-each ;
! Add a few missing ducet values ! Add a few missing ducet values for Tibetan
! https://www.unicode.org/reports/tr10/tr10-41.html#Well_Formed_DUCET
fixup-ducet fixup-ducet
: tangut-block? ( char -- ? ) : tangut-block? ( char -- ? )
@ -248,6 +242,7 @@ fixup-ducet
: building-last ( -- char ) : building-last ( -- char )
building get [ 0 ] [ last last ] if-empty ; building get [ 0 ] [ last last ] if-empty ;
! https://www.unicode.org/reports/tr10/tr10-41.html#Collation_Graphemes
: blocked? ( char -- ? ) : blocked? ( char -- ? )
combining-class dup { 0 f } member? combining-class dup { 0 f } member?
[ drop building-last non-starter? ] [ drop building-last non-starter? ]

View File

@ -53,7 +53,7 @@ CONSTANT: categories {
MEMO: categories-map ( -- hashtable ) MEMO: categories-map ( -- hashtable )
categories H{ } zip-index-as ; categories H{ } zip-index-as ;
CONSTANT: num-chars 0x2FA1E CONSTANT: NUM-CHARS 0x2FA1E
PRIVATE> PRIVATE>
@ -76,7 +76,7 @@ PRIVATE>
! Loading data from UnicodeData.txt ! Loading data from UnicodeData.txt
: load-data ( -- data ) : load-unicode-data ( -- data )
"vocab:unicode/UCD/UnicodeData.txt" load-data-file ; "vocab:unicode/UCD/UnicodeData.txt" load-data-file ;
: (process-data) ( index data -- newdata ) : (process-data) ( index data -- newdata )
@ -146,7 +146,7 @@ PRIVATE>
] assoc-each table ; ] assoc-each table ;
:: process-category ( data -- category-listing ) :: process-category ( data -- category-listing )
num-chars <byte-array> :> table NUM-CHARS <byte-array> :> table
2 data (process-data) [| char cat | 2 data (process-data) [| char cat |
cat categories-map at char table ?set-nth cat categories-map at char table ?set-nth
] assoc-each table fill-ranges ; ] assoc-each table fill-ranges ;
@ -194,7 +194,7 @@ C: <code-point> code-point
[ length 5 = ] filter [ length 5 = ] filter
[ [ set-code-point ] each ] H{ } make ; [ [ set-code-point ] each ] H{ } make ;
load-data { load-unicode-data {
[ process-names name-map swap assoc-union! drop ] [ process-names name-map swap assoc-union! drop ]
[ 13 swap process-data simple-lower swap assoc-union! drop ] [ 13 swap process-data simple-lower swap assoc-union! drop ]
[ 12 swap process-data simple-upper swap assoc-union! drop ] [ 12 swap process-data simple-upper swap assoc-union! drop ]