2008-10-05 19:36:56 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2008 Daniel Ehrenberg.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-10 23:11:40 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								USING: combinators.short-circuit unicode.categories kernel math
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								combinators splitting sequences math.parser io.files io assocs
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-18 22:04:36 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								arrays namespaces make math.ranges unicode.normalize
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								unicode.normalize.private values io.encodings.ascii
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 02:11:45 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								unicode.data compiler.units fry unicode.categories.syntax
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:53:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								alien.syntax sets accessors interval-maps memoize locals words
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								simple-flat-file ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: unicode.breaks
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<PRIVATE
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								! Grapheme breaks
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-06 11:19:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								C-ENUM: Any L V T LV LVT Extend Control CR LF
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    SpacingMark Prepend graphemes ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: jamo-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup initial? [ drop L ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ dup medial? [ drop V ] [ final? T Any ? ] if ] if ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-06 11:19:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: hangul-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    hangul-base - HEX: 1C mod zero? LV LVT ? ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CATEGORY: grapheme-control Zl Zp Cc Cf ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: control-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: \r [ CR ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { CHAR: \n [ LF ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { HEX: 200C [ Extend ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { HEX: 200D [ Extend ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ drop Control ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-21 02:11:45 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								CATEGORY: extend
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Me Mn |
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "Other_Grapheme_Extend" property? ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-06 11:19:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: loe? ( ch -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "Logical_Order_Exception" property? ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								CATEGORY: spacing Mc ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: grapheme-class ( ch -- class )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup jamo? ] [ jamo-class ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-06 11:19:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ dup hangul? ] [ hangul-class ] }
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup grapheme-control? ] [ control-class ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-06 11:19:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { [ dup extend? ] [ drop Extend ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ dup spacing? ] [ drop SpacingMark ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { [ loe? ] [ Prepend ] }
							 | 
						
					
						
							
								
									
										
										
										
											2008-04-11 13:54:51 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ Any ]
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } cond ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: init-table ( size -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup [ f <array> ] curry replicate ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: finish-table ( -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    table get [ [ 1 = ] map ] map ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-26 22:27:45 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: eval-seq ( seq -- seq ) [ ?execute ] map ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: (set-table) ( class1 class2 val -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 16:43:42 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ table get nth ] dip '[ _ or ] change-nth ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: set-table ( classes1 classes2 val -- )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ eval-seq ] bi@ ] dip
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ (set-table) ] curry with each ] 2curry each ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: connect ( class1 class2 -- ) 1 set-table ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: disconnect ( class1 class2 -- ) 0 set-table ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: break-around ( classes1 classes2 -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ disconnect ] [ swap disconnect ] 2bi ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: make-grapheme-table ( -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    { CR } { LF } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { Control CR LF } graphemes disconnect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    graphemes { Control CR LF } disconnect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { L } { L V LV LVT } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { LV V } { V T } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { LVT T } { T } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    graphemes { Extend } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    graphemes { SpacingMark } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { Prepend } graphemes connect ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-29 14:33:14 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								VALUE: grapheme-table
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: grapheme-break? ( class1 class2 -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    grapheme-table nth nth not ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: chars ( i str n -- str[i] str[i+n] )
							 | 
						
					
						
							
								
									
										
										
										
											2008-12-04 11:19:18 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    swap [ dupd + ] dip [ ?nth ] curry bi@ ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								PRIVATE>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: first-grapheme ( str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    unclip-slice grapheme-class over
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-23 19:20:47 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ grapheme-class [ nip ] [ grapheme-break? ] 2bi ] find drop
							 | 
						
					
						
							
								
									
										
										
										
											2008-01-28 00:54:38 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    nip swap length or 1+ ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-31 22:48:43 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: first-grapheme-from ( start str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    over tail-slice first-grapheme + ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: last-grapheme ( str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    unclip-last-slice grapheme-class swap
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ grapheme-class dup rot grapheme-break? ] find-last drop ?1+ nip ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: last-grapheme-from ( end str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap head-slice last-grapheme ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<PRIVATE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-08 00:54:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: >pieces ( str quot: ( str -- i ) -- graphemes )
							 | 
						
					
						
							
								
									
										
										
										
											2009-02-28 16:31:34 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ dup empty? not ] swap '[ dup @ cut-slice swap ] produce nip ; inline
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								PRIVATE>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: >graphemes ( str -- graphemes )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    [ first-grapheme ] >pieces ;
							 | 
						
					
						
							
								
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: string-reverse ( str -- rts )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    >graphemes reverse concat ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<PRIVATE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								graphemes init-table table
							 | 
						
					
						
							
								
									
										
										
										
											2008-05-24 18:34:01 -04:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								[ make-grapheme-table finish-table ] with-variable
							 | 
						
					
						
							
								
									
										
										
										
											2008-09-28 01:40:41 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								to: grapheme-table
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! Word breaks
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								VALUE: word-break-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-20 20:53:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								to: word-break-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								wMidNum wMidNumLet wNumeric wExtendNumLet words ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: word-break-classes ( -- table ) ! Is there a way to avoid this?
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    H{
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { "Other" 0 } { "CR" 1 } { "LF" 2 } { "Newline" 3 }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "Extend" 4 } { "Format" 5 } { "Katakana" 6 }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "ALetter" 7 } { "MidLetter" 8 }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "MidNum" 9 } { "MidNumLet" 10 } { "Numeric" 11 }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { "ExtendNumLet" 12 }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: word-break-prop ( char -- word-break-prop )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    word-break-table interval-at
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    word-break-classes at [ wOther ] unless* ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: check-letter-before
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: check-letter-after
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: check-number-before
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								SYMBOL: check-number-after
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: make-word-table ( -- )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    { wCR } { wLF } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wNewline wCR wLF } words disconnect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    words { wNewline wCR wLF } disconnect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wALetter } { wMidLetter wMidNumLet } check-letter-after set-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wMidLetter wMidNumLet } { wALetter } check-letter-before set-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wNumeric wALetter } { wNumeric wALetter } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wNumeric } { wMidNum wMidNumLet } check-number-after set-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wMidNum wMidNumLet } { wNumeric } check-number-before set-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wKatakana } { wKatakana } connect
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    { wALetter wNumeric wKatakana wExtendNumLet } { wExtendNumLet }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ connect ] [ swap connect ] 2bi ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								VALUE: word-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: finish-word-table ( -- table )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    table get [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ { { 0 [ f ] } { 1 [ t ] } [ ] } case ] map
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] map ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								words init-table table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								[ make-word-table finish-word-table ] with-variable
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								to: word-table
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: word-table-nth ( class1 class2 -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    word-table nth nth ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: property-not= ( str i property -- ? )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-08 23:23:39 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    i [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        i str ?nth [ word-break-prop property = not ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ f ] if*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] [ t ] if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: format/extended? ( ch -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    word-break-prop { 4 5 } member? ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: (walk-up) ( str i -- j )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap [ format/extended? not ] find-from drop ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: walk-up ( str i -- j )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dupd 1+ (walk-up) [ 1+ (walk-up) ] [ drop f ] if* ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: (walk-down) ( str i -- j )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    swap [ format/extended? not ] find-last-from drop ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: walk-down ( str i -- j )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dupd (walk-down) [ 1- (walk-down) ] [ drop f ] if* ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 17:46:57 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: word-break? ( str i table-entry -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { t [ 2drop f ] }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        { f [ 2drop t ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { check-letter-after
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            [ dupd walk-up wALetter property-not= ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { check-letter-before
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            [ dupd walk-down wALetter property-not= ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { check-number-after
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            [ dupd walk-up wNumeric property-not= ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        { check-number-before
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            [ dupd walk-down wNumeric property-not= ] }
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-11 20:41:48 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    } case ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 16:08:08 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:: word-break-next ( old-class new-char i str -- next-class ? )
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-15 17:46:57 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    new-char format/extended?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ old-class dup { 1 2 3 } member? ] [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        new-char word-break-prop old-class over word-table-nth
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ str i ] dip word-break?
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-11 20:41:48 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    ] if ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								PRIVATE>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-09 16:53:35 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: first-word ( str -- i )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ unclip-slice word-break-prop over <enum> ] keep
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    '[ swap _ word-break-next ] assoc-find 2drop
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-11 20:41:48 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    nip swap length or 1+ ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-01-07 13:23:07 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: >words ( str -- words )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ first-word ] >pieces ;
							 | 
						
					
						
							
								
									
										
										
										
											2009-03-11 16:51:54 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								<PRIVATE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: nth-next ( i str -- str[i-1] str[i] )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ [ 1- ] keep ] dip '[ _ nth ] bi@ ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								PRIVATE>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: word-break-at? ( i str -- ? )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ drop zero? ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [ length = ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            [ nth-next [ word-break-prop ] dip ] 2keep
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            word-break-next nip
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } 2|| ;
							 |