| 
									
										
										
										
											2008-05-06 21:59:37 -04:00
										 |  |  | USING: values kernel sequences assocs io.files | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | io.encodings ascii math.ranges io splitting math.parser  | 
					
						
							|  |  |  | namespaces byte-arrays locals math sets io.encodings.ascii | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  | words compiler.units arrays interval-maps ;
 | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | IN: unicode.script | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  | VALUE: script-table | 
					
						
							|  |  |  | SYMBOL: interned | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-script ( stream -- assoc )
 | 
					
						
							|  |  |  |     ! assoc is code point/range => name | 
					
						
							| 
									
										
										
										
											2008-05-14 00:36:55 -04:00
										 |  |  |     lines [ "#" split1 drop ] map harvest [ | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  |         ";" split1 [ [ blank? ] trim ] bi@
 | 
					
						
							|  |  |  |     ] H{ } map>assoc ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  | : range, ( value key -- )
 | 
					
						
							|  |  |  |     swap interned get
 | 
					
						
							|  |  |  |     [ word-name = ] with find nip 2array , ;
 | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  | : expand-ranges ( assoc -- interval-map )
 | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             CHAR: . pick member? [ | 
					
						
							|  |  |  |                 swap ".." split1 [ hex> ] bi@ 2array
 | 
					
						
							|  |  |  |             ] [ swap hex> ] if range, | 
					
						
							|  |  |  |         ] assoc-each
 | 
					
						
							|  |  |  |     ] { } make <interval-map> ;
 | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : >symbols ( strings -- symbols )
 | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         [ "unicode.script" create dup define-symbol ] map
 | 
					
						
							|  |  |  |     ] with-compilation-unit ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : process-script ( ranges -- )
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  |     dup values prune >symbols interned [ | 
					
						
							|  |  |  |         expand-ranges \ script-table set-value | 
					
						
							|  |  |  |     ] with-variable ;
 | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : load-script ( -- )
 | 
					
						
							|  |  |  |     "resource:extra/unicode/script/Scripts.txt" | 
					
						
							|  |  |  |     ascii <file-reader> parse-script process-script ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | load-script | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  | SYMBOL: Unknown | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-05 00:33:54 -04:00
										 |  |  | : script-of ( char -- script )
 | 
					
						
							| 
									
										
										
										
											2008-05-06 00:47:22 -04:00
										 |  |  |     script-table interval-at [ Unknown ] unless* ;
 |