| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | USING: accessors arrays assocs combinators.short-circuit fry | 
					
						
							|  |  |  | hints interval-maps kernel math math.order sequences sorting | 
					
						
							|  |  |  | strings unicode.breaks.private unicode.case.private | 
					
						
							|  |  |  | unicode.categories unicode.collation unicode.collation.private | 
					
						
							|  |  |  | unicode.data unicode.data.private unicode.normalize.private | 
					
						
							| 
									
										
										
										
											2019-07-05 10:32:49 -04:00
										 |  |  | unicode.script locals math.ranges ;
 | 
					
						
							| 
									
										
										
										
											2009-01-07 18:59:01 -05:00
										 |  |  | IN: unicode | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: blank Zs Zl Zp | "\r\n\t" member? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: letter Ll | "Other_Lowercase" property? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: digit Nd Nl No ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: control Cc ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY-NOT: character Cn ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORY: math Sm | "Other_Math" property? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : script-of ( char -- script )
 | 
					
						
							|  |  |  |     script-table interval-at ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : name>char ( name -- char ) name-map at ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : char>name ( char -- name ) name-map value-at ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : ch>lower ( ch -- lower ) simple-lower ?at drop ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : ch>upper ( ch -- upper ) simple-upper ?at drop ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : ch>title ( ch -- title ) simple-title ?at drop ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-05 10:32:49 -04:00
										 |  |  | :: first-grapheme ( entire-str start -- i )
 | 
					
						
							|  |  |  |     start :> pos! | 
					
						
							|  |  |  |     entire-str length :> str-len | 
					
						
							|  |  |  |     0 pos 1 + entire-str <slice> grapheme-class | 
					
						
							|  |  |  |     pos 1 + str-len 1 - min pos! | 
					
						
							|  |  |  |     pos str-len 1 - [a,b] [ | 
					
						
							|  |  |  |         1 + 0 swap entire-str <slice> grapheme-class | 
					
						
							|  |  |  |         dup rot swap grapheme-break? | 
					
						
							|  |  |  |     ] find drop nip
 | 
					
						
							|  |  |  |     [ 1 + ] [ str-len start - ] if* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | :: first-grapheme-from ( start str -- i )
 | 
					
						
							|  |  |  |     str start first-grapheme start + ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | :: last-grapheme ( str -- i )
 | 
					
						
							|  |  |  |     str length 1 - :> pos! | 
					
						
							|  |  |  |     pos 0 = [ 0 ] [ | 
					
						
							|  |  |  |         str grapheme-class | 
					
						
							|  |  |  |         pos 1 - 0 max pos! | 
					
						
							|  |  |  |         0 pos [a,b] [ | 
					
						
							|  |  |  |             0 swap 1 + str <slice> grapheme-class | 
					
						
							|  |  |  |             dup rot grapheme-break? | 
					
						
							|  |  |  |         ] find-last drop ?1+ nip
 | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : last-grapheme-from ( end str -- i )
 | 
					
						
							| 
									
										
										
										
											2019-07-05 10:32:49 -04:00
										 |  |  |      swap head-slice last-grapheme ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >pieces ( str quot: ( str -- i ) -- graphemes )
 | 
					
						
							|  |  |  |     [ dup empty? not ] swap '[ dup @ cut-slice swap ] produce nip ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-05 10:32:49 -04:00
										 |  |  | :: >graphemes ( str -- graphemes )
 | 
					
						
							|  |  |  |     str length :> str-len | 
					
						
							|  |  |  |     0 :> pos! 0 :> old-pos! | 
					
						
							|  |  |  |     [ f ! dummy | 
					
						
							|  |  |  |       pos old-pos! old-pos str-len < [ | 
					
						
							|  |  |  |           str pos first-grapheme pos + pos! pos str-len <=
 | 
					
						
							|  |  |  |       ] [ f ] if ] | 
					
						
							|  |  |  |     [ drop old-pos pos str <slice> ] produce nip ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : count-graphemes ( str -- n ) >graphemes length ; inline
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : string-reverse ( str -- rts )
 | 
					
						
							|  |  |  |     >graphemes reverse! concat ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : first-word ( str -- i )
 | 
					
						
							|  |  |  |     [ [ length ] [ first word-break-prop ] bi ] keep
 | 
					
						
							|  |  |  |     1 swap dup '[ _ word-break-next ] find-index-from
 | 
					
						
							|  |  |  |     drop nip swap or ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >words ( str -- words )
 | 
					
						
							|  |  |  |     [ first-word ] >pieces ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : nth-next ( i str -- str[i-1] str[i] )
 | 
					
						
							|  |  |  |     [ [ 1 - ] keep ] dip '[ _ nth ] bi@ ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : word-break-at? ( i str -- ? )
 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         [ drop zero? ] | 
					
						
							|  |  |  |         [ length = ] | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             [ nth-next [ word-break-prop ] dip ] 2keep
 | 
					
						
							|  |  |  |             word-break-next nip
 | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     } 2|| ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : first-word-from ( start str -- i )
 | 
					
						
							|  |  |  |     over tail-slice first-word + ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : last-word ( str -- i )
 | 
					
						
							| 
									
										
										
										
											2017-06-01 17:59:35 -04:00
										 |  |  |     [ length <iota> ] keep '[ _ word-break-at? ] find-last drop 0 or ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : last-word-from ( end str -- i )
 | 
					
						
							|  |  |  |     swap head-slice last-word ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >lower ( string -- lower )
 | 
					
						
							|  |  |  |     locale>lower final-sigma | 
					
						
							|  |  |  |     [ lower>> ] [ ch>lower ] map-case ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | HINTS: >lower string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >upper ( string -- upper )
 | 
					
						
							|  |  |  |     locale>upper | 
					
						
							|  |  |  |     [ upper>> ] [ ch>upper ] map-case ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | HINTS: >upper string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (>title) ( string -- title )
 | 
					
						
							|  |  |  |     locale>upper | 
					
						
							|  |  |  |     [ title>> ] [ ch>title ] map-case ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : capitalize ( string -- title )
 | 
					
						
							|  |  |  |     unclip-slice 1string [ >lower ] [ (>title) ] bi*
 | 
					
						
							|  |  |  |     "" prepend-as ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >title ( string -- title )
 | 
					
						
							|  |  |  |     final-sigma >words [ capitalize ] map! concat ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | HINTS: >title string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >case-fold ( string -- fold )
 | 
					
						
							|  |  |  |     >upper >lower ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 14:39:18 -04:00
										 |  |  | : lower? ( string -- ? ) dup >lower sequence= ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 14:39:18 -04:00
										 |  |  | : upper? ( string -- ? ) dup >upper sequence= ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 14:39:18 -04:00
										 |  |  | : title? ( string -- ? ) dup >title sequence= ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-10 14:39:18 -04:00
										 |  |  | : case-fold? ( string -- ? ) dup >case-fold sequence= ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : nfd ( string -- nfd )
 | 
					
						
							|  |  |  |     [ (nfd) ] with-string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : nfkd ( string -- nfkd )
 | 
					
						
							|  |  |  |     [ (nfkd) ] with-string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : string-append ( s1 s2 -- string )
 | 
					
						
							|  |  |  |     [ append ] keep
 | 
					
						
							|  |  |  |     0 over ?nth non-starter? | 
					
						
							|  |  |  |     [ length dupd reorder-back ] [ drop ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | HINTS: string-append string string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : nfc ( string -- nfc )
 | 
					
						
							|  |  |  |     [ (nfd) combine ] with-string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : nfkc ( string -- nfkc )
 | 
					
						
							|  |  |  |     [ (nfkd) combine ] with-string ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-22 10:47:07 -04:00
										 |  |  | : collation-key/nfd ( string -- key nfd )
 | 
					
						
							|  |  |  |     nfd [ | 
					
						
							|  |  |  |         string>graphemes graphemes>weights | 
					
						
							|  |  |  |         filter-ignorable weights>bytes | 
					
						
							|  |  |  |     ] keep ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : insensitive= ( str1 str2 levels-removed -- ? )
 | 
					
						
							|  |  |  |     [ | 
					
						
							| 
									
										
										
										
											2019-06-22 10:47:07 -04:00
										 |  |  |         [ collation-key/nfd drop ] dip
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  |         [ [ 0 = not ] trim-tail but-last ] times
 | 
					
						
							|  |  |  |     ] curry same? ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : primary= ( str1 str2 -- ? )
 | 
					
						
							|  |  |  |     3 insensitive= ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : secondary= ( str1 str2 -- ? )
 | 
					
						
							|  |  |  |     2 insensitive= ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : tertiary= ( str1 str2 -- ? )
 | 
					
						
							|  |  |  |     1 insensitive= ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : quaternary= ( str1 str2 -- ? )
 | 
					
						
							|  |  |  |     0 insensitive= ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : sort-strings ( strings -- sorted )
 | 
					
						
							| 
									
										
										
										
											2019-06-22 10:47:07 -04:00
										 |  |  |     [ collation-key/nfd 2array ] map natural-sort values ;
 | 
					
						
							| 
									
										
										
										
											2016-03-31 02:29:48 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : string<=> ( str1 str2 -- <=> )
 | 
					
						
							| 
									
										
										
										
											2019-06-22 10:47:07 -04:00
										 |  |  |     [ collation-key/nfd 2array ] compare ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 18:06:19 -04:00
										 |  |  | : upper-surrogate? ( ch -- ? ) 0xD800 0xDBFF between? ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : under-surrogate? ( ch -- ? ) 0xDC00 0xDFFF between? ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-22 10:47:07 -04:00
										 |  |  | CONSTANT: unicode-supported { | 
					
						
							|  |  |  |     "collation" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CONSTANT: unicode-unsupported { | 
					
						
							|  |  |  |     "bidi" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CONSTANT: unicode-version "10.0" |