Unicode collation done, except for tailoring. Normalization test suite
parent
5646d067e4
commit
8ac9d9d9b2
|
@ -10,16 +10,8 @@ IN: unicode.collation.tests
|
|||
: test-two ( str1 str2 -- )
|
||||
[ +lt+ ] -rot [ string<=> ] 2curry unit-test ;
|
||||
|
||||
: find-failure
|
||||
parse-test dup 2 <clumps>
|
||||
[ string<=> +lt+ = not ] assoc-find drop ;
|
||||
|
||||
: (find-failure)
|
||||
dup 2 <clumps>
|
||||
[ string<=> +lt+ = not ] assoc-find drop ;
|
||||
|
||||
: failures
|
||||
parse-test dup 2 <clumps>
|
||||
[ string<=> +lt+ = not ] assoc-filter dup assoc-size ;
|
||||
|
||||
[ 7 ] [ failures 2nip ] unit-test
|
||||
parse-test 2 <clumps> [ test-two ] assoc-each
|
||||
|
|
|
@ -76,28 +76,31 @@ ascii <file-reader> parse-ducet \ ducet set-value
|
|||
[ drop ] [ 1string , ] if
|
||||
] if ;
|
||||
|
||||
! The terminator method for Hangul syllables
|
||||
: terminator 1 0 0 f weight boa ;
|
||||
|
||||
MACRO: const ( seq -- seq )
|
||||
[ dup word? [ execute ] when ] deep-map 1quotation ;
|
||||
|
||||
! : char, ( char -- )
|
||||
! [
|
||||
! building get peek [ first ] bi@ dup jamo? [
|
||||
! over jamo? [
|
||||
! [ grapheme-class ] bi@ swap 2array
|
||||
! { { T L } { V L } { V T } } const
|
||||
! member? [ terminator , ] when
|
||||
! ] [ 2drop terminator , ] if
|
||||
! ] [ 2drop ] if
|
||||
! ] [ , ] bi ;
|
||||
: char, ( char -- )
|
||||
[
|
||||
building get peek [ first ] bi@ dup jamo? [
|
||||
over jamo? [
|
||||
[ grapheme-class ] bi@ swap 2array
|
||||
{ { T L } { V L } { V T } } const
|
||||
member? [ terminator , ] when
|
||||
] [ 2drop terminator , ] if
|
||||
] [ 2drop ] if
|
||||
] [ , ] bi ;
|
||||
|
||||
! : insert-terminators ( graphemes -- graphemes )
|
||||
: insert-terminators ( graphemes -- graphemes )
|
||||
! Insert a terminator between hangul syllables
|
||||
! [ unclip , [ char, ] each ] { } make ;
|
||||
[ unclip , [ char, ] each ] { } make ;
|
||||
! The above code for the terminator method is not used
|
||||
! The test suite passes without it
|
||||
|
||||
: string>graphemes ( string -- graphemes )
|
||||
[ [ add ] each ] { } make ; ! insert-terminators ;
|
||||
[ [ add ] each ] { } make ;
|
||||
|
||||
: graphemes>weights ( graphemes -- weights )
|
||||
[
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,7 @@
|
|||
USING: unicode.normalize kernel tools.test sequences ;
|
||||
USING: unicode.normalize kernel tools.test sequences
|
||||
unicode.data io.encodings.utf8 io.files splitting math.parser
|
||||
locals math quotations assocs combinators ;
|
||||
IN: unicode.normalize.tests
|
||||
|
||||
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
||||
|
||||
|
@ -16,3 +19,26 @@ USING: unicode.normalize kernel tools.test sequences ;
|
|||
[ f ] [ 0 hangul? ] unit-test
|
||||
[ "\u001112\u001161\u0011ab" ] [ "\u00d55c" nfd ] unit-test
|
||||
[ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test
|
||||
|
||||
: parse-test ( -- tests )
|
||||
"resource:extra/unicode/normalize/NormalizationTest.txt"
|
||||
utf8 file-lines filter-comments
|
||||
[ ";" split 5 head [ " " split [ hex> ] "" map-as ] map ] map ;
|
||||
|
||||
:: assert= ( test spec quot -- )
|
||||
spec [
|
||||
[
|
||||
[ 1- test nth ] bi@
|
||||
[ 1quotation ] [ quot curry ] bi* unit-test
|
||||
] with each
|
||||
] assoc-each ;
|
||||
|
||||
: run-line ( test -- )
|
||||
{
|
||||
[ { { 2 { 1 2 3 } } { 4 { 4 5 } } } [ nfc ] assert= ]
|
||||
[ { { 3 { 1 2 3 } } { 5 { 4 5 } } } [ nfd ] assert= ]
|
||||
[ { { 4 { 1 2 3 4 5 } } } [ nfkc ] assert= ]
|
||||
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
|
||||
} cleave ;
|
||||
|
||||
! parse-test [ run-line ] each
|
||||
|
|
|
@ -27,7 +27,7 @@ IN: unicode.normalize
|
|||
hangul-base - final-count /mod final-base +
|
||||
>r medial-count /mod medial-base +
|
||||
>r initial-base + r> r>
|
||||
dup zero? [ drop 2array ] [ 3array ] if ;
|
||||
dup final-base = [ drop 2array ] [ 3array ] if ;
|
||||
|
||||
: jamo>hangul ( initial medial final -- hangul )
|
||||
>r >r initial-base - medial-count *
|
||||
|
|
Loading…
Reference in New Issue