2008-10-05 19:36:56 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2008 Daniel Ehrenberg.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-10 23:11:40 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								USING: combinators.short-circuit unicode.categories kernel math
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								combinators splitting sequences math.parser io.files io assocs
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								arrays namespaces make math.ranges unicode.normalize values
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								io.encodings.ascii unicode.syntax unicode.data compiler.units
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								alien.syntax sets ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: unicode.breaks
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-27 19:44:26 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								C-ENUM: Any L V T Extend Control CR LF graphemes ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: jamo-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup initial? [ drop L ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CATEGORY: grapheme-control Zl Zp Cc Cf ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: control-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: \r [ CR ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: \n [ LF ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { HEX: 200C [ Extend ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { HEX: 200D [ Extend ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ drop Control ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CATEGORY: (extend) Me Mn ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: extend? ( ch -- ? )
							 | 
						
					
						
							
								
									
										
										
										
											2008-06-13 02:51:46 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: grapheme-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup jamo? ] [ jamo-class ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup grapheme-control? ] [ control-class ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ extend? ] [ Extend ] }
							 | 
						
					
						
							
								
									
										
										
										
											2008-04-11 13:54:51 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ Any ]
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } cond ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: init-grapheme-table ( -- table )
							 | 
						
					
						
							
								
									
										
										
										
											2008-06-13 02:51:46 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    graphemes [ graphemes f <array> ] replicate ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: finish-table ( -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    table get [ [ 1 = ] map ] map ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: set-table ( class1 class2 val -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    -rot table get nth [ swap or ] change-nth ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: connect ( class1 class2 -- ) 1 set-table ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: disconnect ( class1 class2 -- ) 0 set-table ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: connect-before ( class classes -- )
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-09 17:36:30 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ connect ] with each ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: connect-after ( classes class -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ connect ] curry each ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: break-around ( classes1 classes2 -- )
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-09 17:36:30 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ 2dup disconnect swap disconnect ] with each ] curry each ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: make-grapheme-table ( -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    CR LF connect
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-27 19:44:26 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Control CR LF 3array graphemes break-around
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    L L V 2array connect-before
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    V V T 2array connect-before
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    T T connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    graphemes Extend connect-after ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-29 14:33:14 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								VALUE: grapheme-table
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: grapheme-break? ( class1 class2 -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    grapheme-table nth nth not ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: chars ( i str n -- str[i] str[i+n] )
							 | 
						
					
						
							
								
									
										
										
										
											2008-03-29 21:36:58 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap >r dupd + r> [ ?nth ] curry bi@ ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: find-index ( seq quot -- i ) find drop ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: find-last-index ( seq quot -- i ) find-last drop ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: first-grapheme ( str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    unclip-slice grapheme-class over
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ grapheme-class tuck grapheme-break? ] find-index
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    nip swap length or 1+ ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: (>graphemes) ( str -- )
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-06 20:13:59 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        dup first-grapheme cut-slice
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        swap , (>graphemes)
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-06 20:13:59 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] unless-empty ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: >graphemes ( str -- graphemes )
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ (>graphemes) ] { } make ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: string-reverse ( str -- rts )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    >graphemes reverse concat ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: last-grapheme ( str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    unclip-last-slice grapheme-class swap
							 | 
						
					
						
							
								
									
										
										
										
											2008-04-13 04:52:40 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ grapheme-class dup rot grapheme-break? ] find-last-index ?1+ nip ;
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-10 22:03:34 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-05-24 18:34:01 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								init-grapheme-table table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								[ make-grapheme-table finish-table ] with-variable
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-28 01:40:41 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								to: grapheme-table
							 |