113 lines
2.9 KiB
Factor
Executable File
113 lines
2.9 KiB
Factor
Executable File
USING: kernel unicode.data sequences sequences.next namespaces
|
|
assocs.lib unicode.normalize math unicode.categories combinators
|
|
assocs strings splitting ;
|
|
IN: unicode.case
|
|
|
|
: ch>lower ( ch -- lower ) simple-lower at-default ;
|
|
: ch>upper ( ch -- upper ) simple-upper at-default ;
|
|
: ch>title ( ch -- title ) simple-title at-default ;
|
|
|
|
SYMBOL: locale ! Just casing locale, or overall?
|
|
|
|
: i-dot? ( -- ? )
|
|
locale get { "tr" "az" } member? ;
|
|
|
|
: lithuanian? ( -- ? ) locale get "lt" = ;
|
|
|
|
: dot-over ( -- ch ) HEX: 307 ;
|
|
|
|
: lithuanian-ch>upper ( ? next ch -- ? )
|
|
rot [ 2drop f ]
|
|
[ swap dot-over = over "ij" member? and swap , ] if ;
|
|
|
|
: lithuanian>upper ( string -- lower )
|
|
[ f swap [ lithuanian-ch>upper ] each-next drop ] "" make ;
|
|
|
|
: mark-above? ( ch -- ? )
|
|
combining-class 230 = ;
|
|
|
|
: lithuanian-ch>lower ( next ch -- )
|
|
! This fails to add a dot above in certain edge cases
|
|
! where there is a non-above combining mark before an above one
|
|
! in Lithuanian
|
|
dup , "IJ" member? swap mark-above? and [ dot-over , ] when ;
|
|
|
|
: lithuanian>lower ( string -- lower )
|
|
[ [ lithuanian-ch>lower ] each-next ] "" make ;
|
|
|
|
: turk-ch>upper ( ch -- )
|
|
dup CHAR: i =
|
|
[ drop CHAR: I , dot-over , ] [ , ] if ;
|
|
|
|
: turk>upper ( string -- upper-i )
|
|
[ [ turk-ch>upper ] each ] "" make ;
|
|
|
|
: turk-ch>lower ( ? next ch -- ? )
|
|
{
|
|
{ [ rot ] [ 2drop f ] }
|
|
{ [ dup CHAR: I = ] [
|
|
drop dot-over =
|
|
dup CHAR: i HEX: 131 ? ,
|
|
] }
|
|
{ [ t ] [ , drop f ] }
|
|
} cond ;
|
|
|
|
: turk>lower ( string -- lower-i )
|
|
[ f swap [ turk-ch>lower ] each-next drop ] "" make ;
|
|
|
|
: word-boundary ( prev char -- new ? )
|
|
dup non-starter? [ drop dup ] when
|
|
swap uncased? ;
|
|
|
|
: sigma-map ( string -- string )
|
|
[
|
|
swap [ uncased? ] keep not or
|
|
[ drop HEX: 3C2 ] when
|
|
] map-next ;
|
|
|
|
: final-sigma ( string -- string )
|
|
HEX: 3A3 over member? [ sigma-map ] when ;
|
|
|
|
: map-case ( string string-quot char-quot -- case )
|
|
[
|
|
rot [
|
|
-rot [
|
|
rot dup special-casing at
|
|
[ -rot drop call % ]
|
|
[ -rot nip call , ] ?if
|
|
] 2keep
|
|
] each 2drop
|
|
] "" make ; inline
|
|
|
|
: >lower ( string -- lower )
|
|
i-dot? [ turk>lower ] when
|
|
final-sigma [ code-point-lower ] [ ch>lower ] map-case ;
|
|
|
|
: >upper ( string -- upper )
|
|
i-dot? [ turk>upper ] when
|
|
[ code-point-upper ] [ ch>upper ] map-case ;
|
|
|
|
: >title ( string -- title )
|
|
final-sigma
|
|
CHAR: \s swap
|
|
[ tuck word-boundary swapd
|
|
[ code-point-title ] [ code-point-lower ] if ]
|
|
[ tuck word-boundary swapd
|
|
[ ch>title ] [ ch>lower ] if ]
|
|
map-case nip ;
|
|
|
|
: >case-fold ( string -- fold )
|
|
>upper >lower ;
|
|
|
|
: insensitive= ( str1 str2 -- ? )
|
|
[ >case-fold ] 2apply = ;
|
|
|
|
: lower? ( string -- ? )
|
|
dup >lower = ;
|
|
: upper? ( string -- ? )
|
|
dup >lower = ;
|
|
: title? ( string -- ? )
|
|
dup >title = ;
|
|
: case-fold? ( string -- ? )
|
|
dup >case-fold = ;
|