factor/basis/unicode/breaks/breaks-tests.factor

USING: tools.test unicode.breaks sequences math kernel splitting
unicode.categories io.pathnames io.encodings.utf8 io.files
strings quotations math.parser locals ;
IN: unicode.breaks.tests

{ "\u001112\u001161\u0011abA\u000300a\r\r\n" }
[ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test
{ "dcba" } [ "abcd" string-reverse ] unit-test
{ 3 } [ "\u001112\u001161\u0011abA\u000300a"
        dup last-grapheme head last-grapheme ] unit-test

{ 3 } [ 2 "hello" first-grapheme-from ] unit-test
{ 1 } [ 2 "hello" last-grapheme-from ] unit-test

{ 4 } [ 2 "what am I saying" first-word-from ] unit-test
{ 0 } [ 2 "what am I saying" last-word-from ] unit-test
{ 16 } [ 11 "what am I saying" first-word-from ] unit-test
{ 10 } [ 11 "what am I saying" last-word-from ] unit-test

: grapheme-break-test ( -- filename )
    "vocab:unicode/breaks/GraphemeBreakTest.txt" ;

: word-break-test ( -- filename )
    "vocab:unicode/breaks/WordBreakTest.txt" ;

: parse-test-file ( file-name -- tests )
    utf8 file-lines
    [ "#" split1 drop ] map harvest [
        "÷" split
        [
            "×" split
            [ [ blank? ] trim hex> ] map
            [ { f 0 } member? ] reject
            >string
        ] map
        harvest
    ] map ;

:: test ( tests quot -- )
    tests [
        [ 1quotation ]
        [ concat [ quot call [ "" like ] map ] curry ] bi unit-test
    ] each ;

: grapheme-test ( tests -- )
    [
        [ 1quotation ]
        [ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test
    ] each ;

grapheme-break-test parse-test-file [ >graphemes ] test
word-break-test parse-test-file [ >words ] test

{ { t f t t f t } } [ 6 iota [ "as df" word-break-at? ] map ] unit-test
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								USING: tools.test unicode.breaks sequences math kernel splitting
 								unicode.categories io.pathnames io.encodings.utf8 io.files
-												Word breaks

											
										
										
											2009-01-07 13:23:07 -05:00
+								strings quotations math.parser locals ;
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								IN: unicode.breaks.tests
-												Initial import

											
										
										
											2007-09-20 18:09:08 -04:00
-												factor: second stab at [ ] [ ] unit-test -> { } [ ] unit-test

											
										
										
											2015-07-03 12:39:59 -04:00
+								{ "\u001112\u001161\u0011abA\u000300a\r\r\n" }
-												New \u...... syntax; io.utf8 and io.utf16 can actually use strings now

											
										
										
											2008-02-01 16:00:02 -05:00
+								[ "\r\n\raA\u000300\u001112\u001161\u0011ab" string-reverse ] unit-test
-												factor: second stab at [ ] [ ] unit-test -> { } [ ] unit-test

											
										
										
											2015-07-03 12:39:59 -04:00
+								{ "dcba" } [ "abcd" string-reverse ] unit-test
 								{ 3 } [ "\u001112\u001161\u0011abA\u000300a"
-												Unicode breaks fix and deletion of repetition in syntax

											
										
										
											2008-01-28 00:54:38 -05:00
+								        dup last-grapheme head last-grapheme ] unit-test
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
-												factor: second stab at [ ] [ ] unit-test -> { } [ ] unit-test

											
										
										
											2015-07-03 12:39:59 -04:00
+								{ 3 } [ 2 "hello" first-grapheme-from ] unit-test
 								{ 1 } [ 2 "hello" last-grapheme-from ] unit-test
-												Adding functionality to unicode breaks API for future UI changes

											
										
										
											2009-03-31 22:48:43 -04:00
-												factor: second stab at [ ] [ ] unit-test -> { } [ ] unit-test

											
										
										
											2015-07-03 12:39:59 -04:00
+								{ 4 } [ 2 "what am I saying" first-word-from ] unit-test
 								{ 0 } [ 2 "what am I saying" last-word-from ] unit-test
 								{ 16 } [ 11 "what am I saying" first-word-from ] unit-test
 								{ 10 } [ 11 "what am I saying" last-word-from ] unit-test
-												More features in the unicode.breaks API, with documentation

											
										
										
											2009-07-03 22:22:46 -04:00
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								: grapheme-break-test ( -- filename )
-												Add vocab: for vocab-relative paths
											
										
										
											2009-02-15 20:53:21 -05:00
+								    "vocab:unicode/breaks/GraphemeBreakTest.txt" ;
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
-												Word breaks

											
										
										
											2009-01-07 13:23:07 -05:00
+								: word-break-test ( -- filename )
-												Add vocab: for vocab-relative paths
											
										
										
											2009-02-15 20:53:21 -05:00
+								    "vocab:unicode/breaks/WordBreakTest.txt" ;
-												Word breaks

											
										
										
											2009-01-07 13:23:07 -05:00
 								: parse-test-file ( file-name -- tests )
 								    utf8 file-lines
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								    [ "#" split1 drop ] map harvest [
 								        "÷" split
-												Removing integers-as-sequences

											
										
										
											2010-01-14 10:10:13 -05:00
+								        [
 								            "×" split
 								            [ [ blank? ] trim hex> ] map
-												use reject instead of [ ... not ] filter.

											
										
										
											2015-05-12 21:50:34 -04:00
+								            [ { f 0 } member? ] reject
-												Removing integers-as-sequences

											
										
										
											2010-01-14 10:10:13 -05:00
+								            >string
 								        ] map
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								        harvest
 								    ] map ;
-												Word breaks

											
										
										
											2009-01-07 13:23:07 -05:00
+								:: test ( tests quot -- )
 								    tests [
 								        [ 1quotation ]
 								        [ concat [ quot call [ "" like ] map ] curry ] bi unit-test
 								    ] each ;
-												Fixing some unit test failures

											
										
										
											2009-04-22 08:05:00 -04:00
+								: grapheme-test ( tests -- )
-												unicode.breaks tests and bug fixes

											
										
										
											2009-01-06 11:19:19 -05:00
+								    [
 								        [ 1quotation ]
 								        [ concat [ >graphemes [ "" like ] map ] curry ] bi unit-test
 								    ] each ;
-												Word breaks

											
										
										
											2009-01-07 13:23:07 -05:00
+								grapheme-break-test parse-test-file [ >graphemes ] test
-												Unicode word breaks

											
										
										
											2009-01-07 16:08:08 -05:00
+								word-break-test parse-test-file [ >words ] test
-												Adding word breaks to regexp

											
										
										
											2009-03-11 16:51:54 -04:00
-												factor: second stab at [ ] [ ] unit-test -> { } [ ] unit-test

											
										
										
											2015-07-03 12:39:59 -04:00
+								{ { t f t t f t } } [ 6 iota [ "as df" word-break-at? ] map ] unit-test