2008-09-10 23:11:40 -04:00
|
|
|
USING: combinators.short-circuit unicode.categories kernel math
|
|
|
|
combinators splitting sequences math.parser io.files io assocs
|
|
|
|
arrays namespaces make math.ranges unicode.normalize values
|
|
|
|
io.encodings.ascii unicode.syntax unicode.data compiler.units
|
|
|
|
alien.syntax sets ;
|
2007-09-20 18:09:08 -04:00
|
|
|
IN: unicode.breaks
|
|
|
|
|
2008-01-27 19:44:26 -05:00
|
|
|
C-ENUM: Any L V T Extend Control CR LF graphemes ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: jamo-class ( ch -- class )
|
|
|
|
dup initial? [ drop L ]
|
|
|
|
[ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
|
|
|
|
|
|
|
|
CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
|
|
|
: control-class ( ch -- class )
|
|
|
|
{
|
|
|
|
{ CHAR: \r [ CR ] }
|
|
|
|
{ CHAR: \n [ LF ] }
|
|
|
|
{ HEX: 200C [ Extend ] }
|
|
|
|
{ HEX: 200D [ Extend ] }
|
|
|
|
[ drop Control ]
|
|
|
|
} case ;
|
|
|
|
|
|
|
|
CATEGORY: (extend) Me Mn ;
|
|
|
|
: extend? ( ch -- ? )
|
2008-06-13 02:51:46 -04:00
|
|
|
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: grapheme-class ( ch -- class )
|
|
|
|
{
|
|
|
|
{ [ dup jamo? ] [ jamo-class ] }
|
|
|
|
{ [ dup grapheme-control? ] [ control-class ] }
|
|
|
|
{ [ extend? ] [ Extend ] }
|
2008-04-11 13:54:51 -04:00
|
|
|
[ Any ]
|
2007-09-20 18:09:08 -04:00
|
|
|
} cond ;
|
|
|
|
|
|
|
|
: init-grapheme-table ( -- table )
|
2008-06-13 02:51:46 -04:00
|
|
|
graphemes [ graphemes f <array> ] replicate ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
SYMBOL: table
|
|
|
|
|
|
|
|
: finish-table ( -- table )
|
|
|
|
table get [ [ 1 = ] map ] map ;
|
|
|
|
|
|
|
|
: set-table ( class1 class2 val -- )
|
|
|
|
-rot table get nth [ swap or ] change-nth ;
|
|
|
|
|
|
|
|
: connect ( class1 class2 -- ) 1 set-table ;
|
|
|
|
: disconnect ( class1 class2 -- ) 0 set-table ;
|
|
|
|
|
|
|
|
: connect-before ( class classes -- )
|
2008-01-09 17:36:30 -05:00
|
|
|
[ connect ] with each ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: connect-after ( classes class -- )
|
|
|
|
[ connect ] curry each ;
|
|
|
|
|
|
|
|
: break-around ( classes1 classes2 -- )
|
2008-01-09 17:36:30 -05:00
|
|
|
[ [ 2dup disconnect swap disconnect ] with each ] curry each ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: make-grapheme-table ( -- )
|
|
|
|
CR LF connect
|
2008-01-27 19:44:26 -05:00
|
|
|
Control CR LF 3array graphemes break-around
|
|
|
|
L L V 2array connect-before
|
|
|
|
V V T 2array connect-before
|
2007-09-20 18:09:08 -04:00
|
|
|
T T connect
|
|
|
|
graphemes Extend connect-after ;
|
|
|
|
|
2008-01-29 14:33:14 -05:00
|
|
|
VALUE: grapheme-table
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: grapheme-break? ( class1 class2 -- ? )
|
|
|
|
grapheme-table nth nth not ;
|
|
|
|
|
|
|
|
: chars ( i str n -- str[i] str[i+n] )
|
2008-03-29 21:36:58 -04:00
|
|
|
swap >r dupd + r> [ ?nth ] curry bi@ ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-01-28 00:54:38 -05:00
|
|
|
: find-index ( seq quot -- i ) find drop ; inline
|
|
|
|
: find-last-index ( seq quot -- i ) find-last drop ; inline
|
|
|
|
|
|
|
|
: first-grapheme ( str -- i )
|
|
|
|
unclip-slice grapheme-class over
|
|
|
|
[ grapheme-class tuck grapheme-break? ] find-index
|
|
|
|
nip swap length or 1+ ;
|
|
|
|
|
|
|
|
: (>graphemes) ( str -- )
|
2008-09-06 20:13:59 -04:00
|
|
|
[
|
2008-01-28 00:54:38 -05:00
|
|
|
dup first-grapheme cut-slice
|
|
|
|
swap , (>graphemes)
|
2008-09-06 20:13:59 -04:00
|
|
|
] unless-empty ;
|
2008-01-28 00:54:38 -05:00
|
|
|
|
2007-09-20 18:09:08 -04:00
|
|
|
: >graphemes ( str -- graphemes )
|
2008-01-28 00:54:38 -05:00
|
|
|
[ (>graphemes) ] { } make ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: string-reverse ( str -- rts )
|
|
|
|
>graphemes reverse concat ;
|
|
|
|
|
2008-01-28 00:54:38 -05:00
|
|
|
: last-grapheme ( str -- i )
|
|
|
|
unclip-last-slice grapheme-class swap
|
2008-04-13 04:52:40 -04:00
|
|
|
[ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
|
2008-01-10 22:03:34 -05:00
|
|
|
|
2008-05-24 18:34:01 -04:00
|
|
|
init-grapheme-table table
|
|
|
|
[ make-grapheme-table finish-table ] with-variable
|
2008-09-28 01:40:41 -04:00
|
|
|
to: grapheme-table
|