| 
									
										
										
										
											2008-09-10 21:07:00 -04:00
										 |  |  | USING: unicode.data kernel math sequences parser lexer | 
					
						
							|  |  |  | bit-arrays namespaces make sequences.private arrays quotations | 
					
						
							|  |  |  | assocs classes.predicate math.order eval ;
 | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  | IN: unicode.syntax | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! Character classes (categories) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : category# ( char -- category )
 | 
					
						
							|  |  |  |     ! There are a few characters that should be Cn | 
					
						
							|  |  |  |     ! that this gives Cf or Mn | 
					
						
							|  |  |  |     ! Cf = 26; Mn = 5; Cn = 29 | 
					
						
							|  |  |  |     ! Use a compressed array instead? | 
					
						
							|  |  |  |     dup category-map ?nth [ ] [ | 
					
						
							|  |  |  |         dup HEX: E0001 HEX: E007F between? | 
					
						
							|  |  |  |         [ drop 26 ] [ | 
					
						
							|  |  |  |             HEX: E0100 HEX: E01EF between?  5 29 ?
 | 
					
						
							|  |  |  |         ] if
 | 
					
						
							|  |  |  |     ] ?if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : category ( char -- category )
 | 
					
						
							|  |  |  |     category# categories nth ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : >category-array ( categories -- bitarray )
 | 
					
						
							| 
									
										
										
										
											2008-01-10 22:03:34 -05:00
										 |  |  |     categories [ swap member? ] with map >bit-array ;
 | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : as-string ( strings -- bit-array )
 | 
					
						
							| 
									
										
										
										
											2008-01-10 22:03:34 -05:00
										 |  |  |     concat "\"" tuck 3append eval ;
 | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : [category] ( categories -- quot )
 | 
					
						
							|  |  |  |     [ | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |         [ [ categories member? not ] filter as-string ] keep  | 
					
						
							|  |  |  |         [ categories member? ] filter >category-array | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  |         [ dup category# ] % , [ nth-unsafe [ drop t ] ] % | 
					
						
							|  |  |  |         \ member? 2array >quotation , | 
					
						
							|  |  |  |         \ if , | 
					
						
							|  |  |  |     ] [ ] make ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : define-category ( word categories -- )
 | 
					
						
							| 
									
										
										
										
											2008-03-26 19:23:19 -04:00
										 |  |  |     [category] integer swap define-predicate-class ;
 | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : CATEGORY: | 
					
						
							|  |  |  |     CREATE ";" parse-tokens define-category ; parsing | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : seq-minus ( seq1 seq2 -- diff )
 | 
					
						
							| 
									
										
										
										
											2008-04-26 00:17:08 -04:00
										 |  |  |     [ member? not ] curry filter ;
 | 
					
						
							| 
									
										
										
										
											2008-01-09 14:44:07 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : CATEGORY-NOT: | 
					
						
							|  |  |  |     CREATE ";" parse-tokens | 
					
						
							|  |  |  |     categories swap seq-minus define-category ; parsing |