| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | ! Copyright (C) 2009 Daniel Ehrenberg | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2009-03-18 23:55:02 -04:00
										 |  |  | USING: sequences splitting kernel math.parser io.files io.encodings.utf8 | 
					
						
							| 
									
										
										
										
											2009-03-20 20:53:54 -04:00
										 |  |  | biassocs ascii namespaces arrays make assocs interval-maps sets ;
 | 
					
						
							| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | IN: simple-flat-file | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : drop-comments ( seq -- newseq )
 | 
					
						
							| 
									
										
										
										
											2009-03-18 23:49:06 -04:00
										 |  |  |     [ "#@" split first ] map harvest ;
 | 
					
						
							| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : split-column ( line -- columns )
 | 
					
						
							| 
									
										
										
										
											2009-03-07 21:55:55 -05:00
										 |  |  |     " \t" split harvest 2 short head 2 f pad-tail ;
 | 
					
						
							| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-hex ( s -- n )
 | 
					
						
							| 
									
										
										
										
											2009-03-07 21:55:55 -05:00
										 |  |  |     dup [ | 
					
						
							|  |  |  |         "0x" ?head [ "U+" ?head [ "Missing 0x or U+" throw ] unless ] unless
 | 
					
						
							|  |  |  |         hex> | 
					
						
							|  |  |  |     ] when ;
 | 
					
						
							| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-line ( line -- code-unicode )
 | 
					
						
							|  |  |  |     split-column [ parse-hex ] map ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : process-codetable-lines ( lines -- assoc )
 | 
					
						
							|  |  |  |     drop-comments [ parse-line ] map ;  | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : flat-file>biassoc ( filename -- biassoc )
 | 
					
						
							| 
									
										
										
										
											2009-03-18 23:55:02 -04:00
										 |  |  |     utf8 file-lines process-codetable-lines >biassoc ;
 | 
					
						
							| 
									
										
										
										
											2009-03-03 00:19:06 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-18 23:49:06 -04:00
										 |  |  | : split-; ( line -- array )
 | 
					
						
							|  |  |  |     ";" split [ [ blank? ] trim ] map ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : data ( filename -- data )
 | 
					
						
							| 
									
										
										
										
											2009-03-19 01:24:09 -04:00
										 |  |  |     utf8 file-lines drop-comments [ split-; ] map ;
 | 
					
						
							| 
									
										
										
										
											2009-03-20 20:53:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: interned | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : range, ( value key -- )
 | 
					
						
							|  |  |  |     swap interned get
 | 
					
						
							|  |  |  |     [ = ] with find nip 2array , ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : expand-ranges ( assoc -- interval-map )
 | 
					
						
							|  |  |  |     [ | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             swap CHAR: . over member? [ | 
					
						
							|  |  |  |                 ".." split1 [ hex> ] bi@ 2array
 | 
					
						
							|  |  |  |             ] [ hex> ] if range, | 
					
						
							|  |  |  |         ] assoc-each
 | 
					
						
							|  |  |  |     ] { } make <interval-map> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : process-interval-file ( ranges -- table )
 | 
					
						
							|  |  |  |     dup values prune interned | 
					
						
							|  |  |  |     [ expand-ranges ] with-variable ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : load-interval-file ( filename -- table )
 | 
					
						
							|  |  |  |     data process-interval-file ;
 |