| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | ! Copyright (C) 2008 Doug Coleman. | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							|  |  |  | USING: accessors arrays assocs combinators fry kernel locals | 
					
						
							| 
									
										
										
										
											2008-09-18 15:42:16 -04:00
										 |  |  | math math.order regexp.nfa regexp.transition-tables sequences | 
					
						
							|  |  |  | sets sorting vectors regexp.utils sequences.deep ;
 | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | USING: io prettyprint threads ;
 | 
					
						
							| 
									
										
										
										
											2008-09-18 15:42:16 -04:00
										 |  |  | IN: regexp.dfa | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : find-delta ( states transition regexp -- new-states )
 | 
					
						
							|  |  |  |     nfa-table>> transitions>> | 
					
						
							| 
									
										
										
										
											2008-09-18 15:42:16 -04:00
										 |  |  |     rot [ swap at at ] with with gather sift ;
 | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : (find-epsilon-closure) ( states regexp -- new-states )
 | 
					
						
							|  |  |  |     eps swap find-delta ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : find-epsilon-closure ( states regexp -- new-states )
 | 
					
						
							| 
									
										
										
										
											2008-09-12 22:56:25 -04:00
										 |  |  |     '[ dup _ (find-epsilon-closure) union ] [ length ] while-changes | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  |     natural-sort ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : find-closure ( states transition regexp -- new-states )
 | 
					
						
							|  |  |  |     [ find-delta ] 2keep nip find-epsilon-closure ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : find-start-state ( regexp -- state )
 | 
					
						
							|  |  |  |     [ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : find-transitions ( seq1 regexp -- seq2 )
 | 
					
						
							|  |  |  |     nfa-table>> transitions>> | 
					
						
							| 
									
										
										
										
											2008-09-22 15:55:17 -04:00
										 |  |  |     [ at keys ] curry gather | 
					
						
							| 
									
										
										
										
											2008-09-18 15:42:16 -04:00
										 |  |  |     eps swap remove ;
 | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : add-todo-state ( state regexp -- )
 | 
					
						
							|  |  |  |     2dup visited-states>> key? [ | 
					
						
							|  |  |  |         2drop
 | 
					
						
							|  |  |  |     ] [ | 
					
						
							|  |  |  |         [ visited-states>> conjoin ] | 
					
						
							|  |  |  |         [ new-states>> push ] 2bi
 | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : new-transitions ( regexp -- )
 | 
					
						
							|  |  |  |     dup new-states>> [ | 
					
						
							|  |  |  |         drop
 | 
					
						
							|  |  |  |     ] [ | 
					
						
							|  |  |  |         dupd pop dup pick find-transitions rot
 | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             [ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
 | 
					
						
							| 
									
										
										
										
											2008-11-23 00:01:24 -05:00
										 |  |  |             [ swapd transition make-transition ] dip
 | 
					
						
							|  |  |  |             dfa-table>> add-transition  | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  |         ] curry with each
 | 
					
						
							|  |  |  |         new-transitions | 
					
						
							|  |  |  |     ] if-empty ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : states ( hashtable -- array )
 | 
					
						
							|  |  |  |     [ keys ] | 
					
						
							|  |  |  |     [ values [ values concat ] map concat append ] bi ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : set-final-states ( regexp -- )
 | 
					
						
							|  |  |  |     dup
 | 
					
						
							|  |  |  |     [ nfa-table>> final-states>> keys ] | 
					
						
							|  |  |  |     [ dfa-table>> transitions>> states ] bi
 | 
					
						
							|  |  |  |     [ intersect empty? not ] with filter
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     swap dfa-table>> final-states>> | 
					
						
							|  |  |  |     [ conjoin ] curry each ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : set-initial-state ( regexp -- )
 | 
					
						
							|  |  |  |     dup
 | 
					
						
							|  |  |  |     [ dfa-table>> ] [ find-start-state ] bi
 | 
					
						
							|  |  |  |     [ >>start-state drop ] keep
 | 
					
						
							|  |  |  |     1vector >>new-states drop ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-28 23:08:54 -04:00
										 |  |  | : set-traversal-flags ( regexp -- )
 | 
					
						
							| 
									
										
										
										
											2008-09-21 22:45:27 -04:00
										 |  |  |     dup
 | 
					
						
							| 
									
										
										
										
											2008-08-28 23:08:54 -04:00
										 |  |  |     [ nfa-traversal-flags>> ] | 
					
						
							| 
									
										
										
										
											2008-09-21 22:45:27 -04:00
										 |  |  |     [ dfa-table>> transitions>> keys ] bi
 | 
					
						
							|  |  |  |     [ tuck [ swap at ] with map concat ] with H{ } map>assoc
 | 
					
						
							|  |  |  |     >>dfa-traversal-flags drop ;
 | 
					
						
							| 
									
										
										
										
											2008-08-28 23:08:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-26 21:24:14 -04:00
										 |  |  | : construct-dfa ( regexp -- )
 | 
					
						
							| 
									
										
										
										
											2008-09-21 22:45:27 -04:00
										 |  |  |     { | 
					
						
							|  |  |  |         [ set-initial-state ] | 
					
						
							|  |  |  |         [ new-transitions ] | 
					
						
							|  |  |  |         [ set-final-states ] | 
					
						
							|  |  |  |         [ set-traversal-flags ] | 
					
						
							|  |  |  |     } cleave ;
 |