unicode.collation: illegal? is now gone, do AAAA BBBB for every code point
remove some tests that pass nowclean-linux-x86-64
parent
1b790ee6b6
commit
a9928b0dc0
|
@ -61,48 +61,6 @@ IN: unicode.collation.tests
|
|||
{ +lt+ } [ { 111355 98 } { 19968 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 40943 98 } { 64014 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 191456 98 } { 888 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 19894 98 } { 55296 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 55296 98 } { 55297 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 55297 98 } { 55298 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 55298 98 } { 55299 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 55299 98 } { 56320 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 56320 98 } { 57343 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 63743 98 } { 64976 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 64976 98 } { 64977 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 64977 98 } { 64978 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 64978 98 } { 64979 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 65520 98 } { 65534 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 65534 98 } { 65535 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 65535 98 } { 131070 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 131070 98 } { 131071 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 191457 98 } { 196606 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 196606 98 } { 196607 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 196607 98 } { 262142 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 262142 98 } { 262143 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 262143 98 } { 327678 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 327678 98 } { 327679 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 327679 98 } { 393214 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 393214 98 } { 393215 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 393215 98 } { 458750 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 458750 98 } { 458751 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 458751 98 } { 524286 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 524286 98 } { 524287 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 524287 98 } { 589822 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 589822 98 } { 589823 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 589823 98 } { 655358 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 655358 98 } { 655359 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 655359 98 } { 720894 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 720894 98 } { 720895 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 720895 98 } { 786430 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 786430 98 } { 786431 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 786432 98 } { 851966 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 851966 98 } { 851967 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 851968 98 } { 917502 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 917502 98 } { 917503 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 917509 98 } { 983038 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 983038 98 } { 983039 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 1114109 98 } { 1114110 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
{ +lt+ } [ { 1114110 98 } { 1114111 33 } [ >string ] bi@ string<=> ] unit-test
|
||||
|
||||
|
||||
{ { 12748 12741 0 32 74 32 0 2 2 2 0 65535 65535 65535 } }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators
|
||||
combinators.short-circuit combinators.smart kernel locals make
|
||||
combinators.short-circuit combinators.smart fry kernel locals make
|
||||
math math.order math.parser namespaces sequences
|
||||
simple-flat-file splitting strings unicode.data ;
|
||||
IN: unicode.collation
|
||||
|
@ -40,10 +40,164 @@ TUPLE: weight-levels primary secondary tertiary ignorable? ;
|
|||
[ swap set-at ] 2bi
|
||||
] if ;
|
||||
|
||||
: insert-helpers ( assoc -- )
|
||||
dup keys [ length 3 >= ] filter [ help-one ] with each ;
|
||||
: fixup-ducet ( -- )
|
||||
{
|
||||
{
|
||||
{ 0x0FB2 0x0F71 } ! CE(0FB2) CE(0F71)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB3 0x0F71 } ! CE(0FB3) CE(0F71)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12720 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ducet get-global insert-helpers
|
||||
! FIXME: WRONG WEIGHTS
|
||||
{
|
||||
{ 0x0FB2 0x0F71 0x0F72 } ! CE(0FB2) CE(0F71 0F72)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB2 0x0F73 } ! CE(0FB2) CE(0F71 0F72)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB2 0x0F71 0x0F74 } ! CE(0FB2) CE(0F71 0F74)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB2 0x0F75 } ! CE(0FB2) CE(0F71 0F74)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB3 0x0F71 0x0F72 } ! CE(0FB3) CE(0F71 0F72)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB3 0x0F73 } ! CE(0FB3) CE(0F71 0F72)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB3 0x0F71 0x0F74 } ! CE(0FB3) CE(0F71 0F74)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
{ 0x0FB3 0x0F75 } ! CE(0FB3) CE(0F71 0F74)
|
||||
{
|
||||
T{ weight-levels
|
||||
{ primary 12719 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
T{ weight-levels
|
||||
{ primary 12741 }
|
||||
{ secondary 32 }
|
||||
{ tertiary 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
} ducet get-global '[ swap >string _ set-at ] assoc-each ;
|
||||
|
||||
! Add a few missing ducet values
|
||||
fixup-ducet
|
||||
|
||||
: tangut-block? ( char -- ? )
|
||||
! Tangut Block, Tangut Components Block
|
||||
|
@ -73,22 +227,12 @@ ducet get-global insert-helpers
|
|||
: BBBB ( char -- weight-levels )
|
||||
0x7FFF bitand 0x8000 bitor 0 0 <weight-levels> ; inline
|
||||
|
||||
: illegal? ( char -- ? )
|
||||
{
|
||||
[ "Noncharacter_Code_Point" property? ]
|
||||
[ category "Cs" = ]
|
||||
} 1|| ;
|
||||
|
||||
: derive-weight ( 1string -- weight-levels-pair )
|
||||
first
|
||||
dup tangut-block? [
|
||||
[ tangut-AAAA ] [ tangut-BBBB ] bi 2array
|
||||
] [
|
||||
dup illegal? [
|
||||
drop { }
|
||||
] [
|
||||
[ AAAA ] [ BBBB ] bi 2array
|
||||
] if
|
||||
[ AAAA ] [ BBBB ] bi 2array
|
||||
] if ;
|
||||
|
||||
: building-last ( -- char )
|
||||
|
|
Loading…
Reference in New Issue