Unicode.case supports lithuanian properly (hopefully)

2009-02-06 00:42:46 -06:00 · 2009-02-06 00:42:46 -06:00 · 28e644209c
parent e0a60ac429
commit 28e644209c
2 changed files with 34 additions and 12 deletions
--- a/basis/unicode/case/case-tests.factor
+++ b/basis/unicode/case/case-tests.factor
@ -1,4 +1,7 @@
-USING: unicode.case tools.test namespaces ;
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: unicode.case unicode.case.private tools.test namespaces strings unicode.normalize ;
 IN: unicode.case.tests
 \ >upper must-infer
 \ >lower must-infer
@ -9,12 +12,21 @@ USING: unicode.case tools.test namespaces ;
 [ "\u0003C3a\u0003C2 \u0003C3\u0003C2 \u0003C3a\u0003C2" ] [ "\u0003A3A\u0003A3 \u0003A3\u0003A3 \u0003A3A\u0003A3" >lower ] unit-test
 [ t ] [ "hello how are you?" lower? ] unit-test
 [
    [ f ] [ i-dot? ] unit-test
    [ f ] [ lt? ] unit-test
    "tr" locale set
    [ t ] [ i-dot? ] unit-test
    [ f ] [ lt? ] unit-test
    [ "i\u000131i \u000131jj" ] [ "i\u000131I\u000307 IJj" >lower ] unit-test
    [ "I\u000307\u000131i Ijj" ] [ "i\u000131I\u000307 IJj" >title ] unit-test
    [ "I\u000307II\u000307 IJJ" ] [ "i\u000131I\u000307 IJj" >upper ] unit-test
    "lt" locale set
-    ! Lithuanian casing tests
+    [ f ] [ i-dot? ] unit-test
    [ t ] [ lt? ] unit-test
    [ "i\u000307\u000300" ] [ HEX: CC 1string nfd >lower ] unit-test
    [ "\u00012f\u000307" ] [ HEX: 12E 1string nfd >lower nfc ] unit-test
    [ "I\u000300" ] [ "i\u000307\u000300" >upper ] unit-test
 !    [ "I\u000300" ] [ "i\u000307\u000300" >title ] unit-test
 ] with-scope
 [ t ] [ "asdf" lower? ] unit-test
--- a/basis/unicode/case/case.factor
+++ b/basis/unicode/case/case.factor
@ -1,8 +1,8 @@
-! Copyright (C) 2008 Daniel Ehrenberg.
+! Copyright (C) 2008, 2009 Daniel Ehrenberg.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: unicode.data sequences namespaces
 sbufs make unicode.syntax unicode.normalize math hints
-unicode.categories combinators unicode.syntax assocs
+unicode.categories combinators unicode.syntax assocs combinators.short-circuit
 strings splitting kernel accessors unicode.breaks fry locals ;
 QUALIFIED: ascii
 IN: unicode.case
@ -26,6 +26,9 @@ SYMBOL: locale ! Just casing locale, or overall?
 : i-dot? ( -- ? )
    locale get { "tr" "az" } member? ;
 : lt? ( -- ? )
    locale get "lt" = ;
 : lithuanian? ( -- ? ) locale get "lt" = ;
 : dot-over ( -- ch ) HEX: 307 ;
@ -37,18 +40,21 @@ SYMBOL: locale ! Just casing locale, or overall?
 : mark-above? ( ch -- ? )
    combining-class 230 = ;
-: with-rest ( seq quot: ( seq -- seq ) -- seq )
+:: with-rest ( seq quot: ( seq -- seq ) -- seq )
-    [ unclip ] dip swap slip prefix ; inline
+    seq unclip quot dip prefix ; inline
 : add-dots ( seq -- seq )
-    [ [ "" ] [
+    [ [ { } ] [
-        dup first mark-above?
+        [
            dup first
            { [ mark-above? ] [ CHAR: combining-ogonek = ] } 1||
            [ CHAR: combining-dot-above prefix ] when
        ] map
    ] if-empty ] with-rest ; inline
 : lithuanian>lower ( string -- lower )
-    "i" split add-dots "i" join
+    "I" split add-dots "I" join
-    "j" split add-dots "i" join ; inline
+    "J" split add-dots "J" join ; inline
 : turk>upper ( string -- upper-i )
    "i" "I\u000307" replace ; inline
@ -88,13 +94,16 @@ SYMBOL: locale ! Just casing locale, or overall?
 PRIVATE>
 : >lower ( string -- lower )
-    i-dot? [ turk>lower ] when final-sigma
+    i-dot? [ turk>lower ] when
    lt? [ lithuanian>lower ] when
    final-sigma
    [ lower>> ] [ ch>lower ] map-case ;
 HINTS: >lower string ;
 : >upper ( string -- upper )
    i-dot? [ turk>upper ] when
    lt? [ lithuanian>upper ] when
    [ upper>> ] [ ch>upper ] map-case ;
 HINTS: >upper string ;
@ -103,6 +112,7 @@ HINTS: >upper string ;
 : (>title) ( string -- title )
    i-dot? [ turk>upper ] when
    lt? [ lithuanian>upper ] when
    [ title>> ] [ ch>title ] map-case ; inline
 : title-word ( string -- title )