Unicode collation done, except for tailoring. Normalization test suite
parent
5646d067e4
commit
8ac9d9d9b2
|
@ -10,16 +10,8 @@ IN: unicode.collation.tests
|
||||||
: test-two ( str1 str2 -- )
|
: test-two ( str1 str2 -- )
|
||||||
[ +lt+ ] -rot [ string<=> ] 2curry unit-test ;
|
[ +lt+ ] -rot [ string<=> ] 2curry unit-test ;
|
||||||
|
|
||||||
: find-failure
|
|
||||||
parse-test dup 2 <clumps>
|
|
||||||
[ string<=> +lt+ = not ] assoc-find drop ;
|
|
||||||
|
|
||||||
: (find-failure)
|
|
||||||
dup 2 <clumps>
|
|
||||||
[ string<=> +lt+ = not ] assoc-find drop ;
|
|
||||||
|
|
||||||
: failures
|
: failures
|
||||||
parse-test dup 2 <clumps>
|
parse-test dup 2 <clumps>
|
||||||
[ string<=> +lt+ = not ] assoc-filter dup assoc-size ;
|
[ string<=> +lt+ = not ] assoc-filter dup assoc-size ;
|
||||||
|
|
||||||
[ 7 ] [ failures 2nip ] unit-test
|
parse-test 2 <clumps> [ test-two ] assoc-each
|
||||||
|
|
|
@ -76,28 +76,31 @@ ascii <file-reader> parse-ducet \ ducet set-value
|
||||||
[ drop ] [ 1string , ] if
|
[ drop ] [ 1string , ] if
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
|
! The terminator method for Hangul syllables
|
||||||
: terminator 1 0 0 f weight boa ;
|
: terminator 1 0 0 f weight boa ;
|
||||||
|
|
||||||
MACRO: const ( seq -- seq )
|
MACRO: const ( seq -- seq )
|
||||||
[ dup word? [ execute ] when ] deep-map 1quotation ;
|
[ dup word? [ execute ] when ] deep-map 1quotation ;
|
||||||
|
|
||||||
! : char, ( char -- )
|
: char, ( char -- )
|
||||||
! [
|
[
|
||||||
! building get peek [ first ] bi@ dup jamo? [
|
building get peek [ first ] bi@ dup jamo? [
|
||||||
! over jamo? [
|
over jamo? [
|
||||||
! [ grapheme-class ] bi@ swap 2array
|
[ grapheme-class ] bi@ swap 2array
|
||||||
! { { T L } { V L } { V T } } const
|
{ { T L } { V L } { V T } } const
|
||||||
! member? [ terminator , ] when
|
member? [ terminator , ] when
|
||||||
! ] [ 2drop terminator , ] if
|
] [ 2drop terminator , ] if
|
||||||
! ] [ 2drop ] if
|
] [ 2drop ] if
|
||||||
! ] [ , ] bi ;
|
] [ , ] bi ;
|
||||||
|
|
||||||
! : insert-terminators ( graphemes -- graphemes )
|
: insert-terminators ( graphemes -- graphemes )
|
||||||
! Insert a terminator between hangul syllables
|
! Insert a terminator between hangul syllables
|
||||||
! [ unclip , [ char, ] each ] { } make ;
|
[ unclip , [ char, ] each ] { } make ;
|
||||||
|
! The above code for the terminator method is not used
|
||||||
|
! The test suite passes without it
|
||||||
|
|
||||||
: string>graphemes ( string -- graphemes )
|
: string>graphemes ( string -- graphemes )
|
||||||
[ [ add ] each ] { } make ; ! insert-terminators ;
|
[ [ add ] each ] { } make ;
|
||||||
|
|
||||||
: graphemes>weights ( graphemes -- weights )
|
: graphemes>weights ( graphemes -- weights )
|
||||||
[
|
[
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,7 @@
|
||||||
USING: unicode.normalize kernel tools.test sequences ;
|
USING: unicode.normalize kernel tools.test sequences
|
||||||
|
unicode.data io.encodings.utf8 io.files splitting math.parser
|
||||||
|
locals math quotations assocs combinators ;
|
||||||
|
IN: unicode.normalize.tests
|
||||||
|
|
||||||
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
[ "ab\u000323\u000302cd" ] [ "ab\u000302" "\u000323cd" string-append ] unit-test
|
||||||
|
|
||||||
|
@ -16,3 +19,26 @@ USING: unicode.normalize kernel tools.test sequences ;
|
||||||
[ f ] [ 0 hangul? ] unit-test
|
[ f ] [ 0 hangul? ] unit-test
|
||||||
[ "\u001112\u001161\u0011ab" ] [ "\u00d55c" nfd ] unit-test
|
[ "\u001112\u001161\u0011ab" ] [ "\u00d55c" nfd ] unit-test
|
||||||
[ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test
|
[ "\u00d55c" ] [ "\u001112\u001161\u0011ab" nfc ] unit-test
|
||||||
|
|
||||||
|
: parse-test ( -- tests )
|
||||||
|
"resource:extra/unicode/normalize/NormalizationTest.txt"
|
||||||
|
utf8 file-lines filter-comments
|
||||||
|
[ ";" split 5 head [ " " split [ hex> ] "" map-as ] map ] map ;
|
||||||
|
|
||||||
|
:: assert= ( test spec quot -- )
|
||||||
|
spec [
|
||||||
|
[
|
||||||
|
[ 1- test nth ] bi@
|
||||||
|
[ 1quotation ] [ quot curry ] bi* unit-test
|
||||||
|
] with each
|
||||||
|
] assoc-each ;
|
||||||
|
|
||||||
|
: run-line ( test -- )
|
||||||
|
{
|
||||||
|
[ { { 2 { 1 2 3 } } { 4 { 4 5 } } } [ nfc ] assert= ]
|
||||||
|
[ { { 3 { 1 2 3 } } { 5 { 4 5 } } } [ nfd ] assert= ]
|
||||||
|
[ { { 4 { 1 2 3 4 5 } } } [ nfkc ] assert= ]
|
||||||
|
[ { { 5 { 1 2 3 4 5 } } } [ nfkd ] assert= ]
|
||||||
|
} cleave ;
|
||||||
|
|
||||||
|
! parse-test [ run-line ] each
|
||||||
|
|
|
@ -27,7 +27,7 @@ IN: unicode.normalize
|
||||||
hangul-base - final-count /mod final-base +
|
hangul-base - final-count /mod final-base +
|
||||||
>r medial-count /mod medial-base +
|
>r medial-count /mod medial-base +
|
||||||
>r initial-base + r> r>
|
>r initial-base + r> r>
|
||||||
dup zero? [ drop 2array ] [ 3array ] if ;
|
dup final-base = [ drop 2array ] [ 3array ] if ;
|
||||||
|
|
||||||
: jamo>hangul ( initial medial final -- hangul )
|
: jamo>hangul ( initial medial final -- hangul )
|
||||||
>r >r initial-base - medial-count *
|
>r >r initial-base - medial-count *
|
||||||
|
|
Loading…
Reference in New Issue