| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | ! Copyright (C) 2009 Daniel Ehrenberg. | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2009-02-26 19:06:57 -05:00
										 |  |  | USING: regexp.classes kernel sequences regexp.negation | 
					
						
							| 
									
										
										
										
											2010-03-02 18:05:37 -05:00
										 |  |  | quotations assocs fry math locals combinators sets | 
					
						
							| 
									
										
										
										
											2009-02-26 19:06:57 -05:00
										 |  |  | accessors words compiler.units kernel.private strings | 
					
						
							| 
									
										
										
										
											2009-03-13 20:40:38 -04:00
										 |  |  | sequences.private arrays namespaces unicode.breaks | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | regexp.transition-tables combinators.short-circuit ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | IN: regexp.compiler | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | GENERIC: question>quot ( question -- quot )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SYMBOL: shortest? | 
					
						
							|  |  |  | SYMBOL: backwards? | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | M: t question>quot drop [ 2drop t ] ;
 | 
					
						
							| 
									
										
										
										
											2009-03-11 16:51:54 -04:00
										 |  |  | M: f question>quot drop [ 2drop f ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | M: beginning-of-input question>quot | 
					
						
							|  |  |  |     drop [ drop zero? ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: end-of-input question>quot | 
					
						
							|  |  |  |     drop [ length = ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: end-of-file question>quot | 
					
						
							|  |  |  |     drop [ | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             [ length swap - 2 <= ] | 
					
						
							|  |  |  |             [ swap tail { "\n" "\r\n" "\r" "" } member? ] | 
					
						
							|  |  |  |         } 2&& | 
					
						
							|  |  |  |     ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: $ question>quot | 
					
						
							|  |  |  |     drop [ { [ length = ] [ ?nth "\r\n" member? ] } 2|| ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: ^ question>quot | 
					
						
							| 
									
										
										
										
											2009-08-13 20:21:44 -04:00
										 |  |  |     drop [ { [ drop zero? ] [ [ 1 - ] dip ?nth "\r\n" member? ] } 2|| ] ;
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-16 18:53:38 -04:00
										 |  |  | M: $unix question>quot | 
					
						
							|  |  |  |     drop [ { [ length = ] [ ?nth CHAR: \n = ] } 2|| ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: ^unix question>quot | 
					
						
							| 
									
										
										
										
											2009-08-13 20:21:44 -04:00
										 |  |  |     drop [ { [ drop zero? ] [ [ 1 - ] dip ?nth CHAR: \n = ] } 2|| ] ;
 | 
					
						
							| 
									
										
										
										
											2009-03-16 18:53:38 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-11 16:51:54 -04:00
										 |  |  | M: word-break question>quot | 
					
						
							|  |  |  |     drop [ word-break-at? ] ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-07 17:31:46 -05:00
										 |  |  | : (execution-quot) ( next-state -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     ! The conditions here are for lookaround and anchors, etc | 
					
						
							|  |  |  |     dup condition? [ | 
					
						
							|  |  |  |         [ question>> question>quot ] [ yes>> ] [ no>> ] tri
 | 
					
						
							| 
									
										
										
										
											2009-03-07 17:31:46 -05:00
										 |  |  |         [ (execution-quot) ] bi@
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |         '[ 2dup @ _ _ if ] | 
					
						
							| 
									
										
										
										
											2009-03-07 17:31:46 -05:00
										 |  |  |     ] [ '[ _ execute ] ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : execution-quot ( next-state -- quot )
 | 
					
						
							|  |  |  |     dup sequence? [ first ] when
 | 
					
						
							|  |  |  |     (execution-quot) ;
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | TUPLE: box contents ;
 | 
					
						
							|  |  |  | C: <box> box | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-04 14:22:22 -05:00
										 |  |  | : condition>quot ( condition -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     ! Conditions here are for different classes | 
					
						
							| 
									
										
										
										
											2009-03-04 14:22:22 -05:00
										 |  |  |     dup condition? [ | 
					
						
							|  |  |  |         [ question>> ] [ yes>> ] [ no>> ] tri
 | 
					
						
							|  |  |  |         [ condition>quot ] bi@
 | 
					
						
							|  |  |  |         '[ dup _ class-member? _ _ if ] | 
					
						
							|  |  |  |     ] [ | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |         contents>> | 
					
						
							|  |  |  |         [ [ 3drop ] ] [ execution-quot '[ drop @ ] ] if-empty
 | 
					
						
							| 
									
										
										
										
											2009-03-04 14:22:22 -05:00
										 |  |  |     ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-07 17:31:46 -05:00
										 |  |  | : non-literals>dispatch ( literals non-literals  -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     [ swap ] assoc-map ! we want state => predicate, and get the opposite as input | 
					
						
							| 
									
										
										
										
											2009-03-09 16:44:11 -04:00
										 |  |  |     swap keys f assoc-answers | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     table>condition [ <box> ] condition-map condition>quot ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : literals>cases ( literal-transitions -- case-body )
 | 
					
						
							|  |  |  |     [ execution-quot ] assoc-map ;
 | 
					
						
							| 
									
										
										
										
											2009-03-04 16:54:56 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | : split-literals ( transitions -- case default )
 | 
					
						
							| 
									
										
										
										
											2009-03-11 23:04:47 -04:00
										 |  |  |     { } assoc-like [ first integer? ] partition
 | 
					
						
							| 
									
										
										
										
											2009-03-07 17:31:46 -05:00
										 |  |  |     [ [ literals>cases ] keep ] dip non-literals>dispatch ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  | : advance ( index backwards? -- index+/-1 )
 | 
					
						
							|  |  |  |     -1 1 ? + >fixnum ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : check ( index string backwards? -- in-bounds? )
 | 
					
						
							|  |  |  |     [ drop -1 eq? not ] [ length < ] if ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | :: step ( last-match index str quot final? backwards? -- last-index/f )
 | 
					
						
							| 
									
										
										
										
											2009-02-26 19:06:57 -05:00
										 |  |  |     final? index last-match ?
 | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  |     index str backwards? check [ | 
					
						
							|  |  |  |         index backwards? advance str | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  |         index str nth-unsafe | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |         quot call
 | 
					
						
							| 
									
										
										
										
											2009-02-26 19:06:57 -05:00
										 |  |  |     ] when ; inline
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : transitions>quot ( transitions final-state? -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     dup shortest? get and [ 2drop [ drop nip ] ] [ | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  |         [ split-literals swap case>quot ] dip backwards? get
 | 
					
						
							|  |  |  |         '[ { fixnum string } declare _ _ _ step ] | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : word>quot ( word dfa -- quot )
 | 
					
						
							|  |  |  |     [ transitions>> at ] | 
					
						
							| 
									
										
										
										
											2010-03-02 18:05:37 -05:00
										 |  |  |     [ final-states>> in? ] 2bi
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  |     transitions>quot ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : states>code ( words dfa -- )
 | 
					
						
							| 
									
										
										
										
											2009-03-16 06:00:27 -04:00
										 |  |  |     '[ | 
					
						
							|  |  |  |         dup _ word>quot | 
					
						
							| 
									
										
										
										
											2011-10-18 16:18:42 -04:00
										 |  |  |         ( last-match index string -- ? )
 | 
					
						
							| 
									
										
										
										
											2009-03-16 06:00:27 -04:00
										 |  |  |         define-declared | 
					
						
							|  |  |  |     ] each ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | : states>words ( dfa -- words dfa )
 | 
					
						
							|  |  |  |     dup transitions>> keys [ gensym ] H{ } map>assoc
 | 
					
						
							| 
									
										
										
										
											2014-05-19 13:04:16 -04:00
										 |  |  |     [ transitions-at ] | 
					
						
							|  |  |  |     [ values ] | 
					
						
							|  |  |  |     bi swap ;  | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | : dfa>main-word ( dfa -- word )
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  |     states>words [ states>code ] keep start-state>> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  | : word-template ( quot -- quot' )
 | 
					
						
							| 
									
										
										
										
											2014-05-19 13:04:16 -04:00
										 |  |  |     '[ drop [ f ] 2dip over array-capacity? _ [ 2drop ] if ] ;
 | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2009-02-26 23:14:41 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | : dfa>word ( dfa -- quot )
 | 
					
						
							| 
									
										
										
										
											2009-03-29 23:57:13 -04:00
										 |  |  |     dfa>main-word execution-quot word-template | 
					
						
							| 
									
										
										
										
											2011-10-18 16:18:42 -04:00
										 |  |  |     ( start-index string regexp -- i/f ) define-temp ;
 | 
					
						
							| 
									
										
										
										
											2009-03-05 17:34:04 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | : dfa>shortest-word ( dfa -- word )
 | 
					
						
							|  |  |  |     t shortest? [ dfa>word ] with-variable ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 19:06:57 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | : dfa>reverse-word ( dfa -- word )
 | 
					
						
							|  |  |  |     t backwards? [ dfa>word ] with-variable ;
 | 
					
						
							| 
									
										
										
										
											2009-02-26 15:19:02 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-10 19:27:04 -04:00
										 |  |  | : dfa>reverse-shortest-word ( dfa -- word )
 | 
					
						
							|  |  |  |     t backwards? [ dfa>shortest-word ] with-variable ;
 |