From 5ecff284effd254bacd6498dc13cb7997de37d77 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Sun, 29 Mar 2009 22:57:13 -0500 Subject: [PATCH] Fixing regexp compiler's bounds checks --- basis/regexp/compiler/compiler.factor | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/basis/regexp/compiler/compiler.factor b/basis/regexp/compiler/compiler.factor index 6c7896dcca..5482734865 100644 --- a/basis/regexp/compiler/compiler.factor +++ b/basis/regexp/compiler/compiler.factor @@ -84,21 +84,24 @@ C: box { } assoc-like [ first integer? ] partition [ [ literals>cases ] keep ] dip non-literals>dispatch ; -:: step ( last-match index str quot final? direction -- last-index/f ) +: advance ( index backwards? -- index+/-1 ) + -1 1 ? + >fixnum ; inline + +: check ( index string backwards? -- in-bounds? ) + [ drop -1 eq? not ] [ length < ] if ; inline + +:: step ( last-match index str quot final? backwards? -- last-index/f ) final? index last-match ? - index str bounds-check? [ - index direction + str + index str backwards? check [ + index backwards? advance str index str nth-unsafe quot call ] when ; inline -: direction ( -- n ) - backwards? get -1 1 ? ; - : transitions>quot ( transitions final-state? -- quot ) dup shortest? get and [ 2drop [ drop nip ] ] [ - [ split-literals swap case>quot ] dip direction - '[ { array-capacity string } declare _ _ _ step ] + [ split-literals swap case>quot ] dip backwards? get + '[ { fixnum string } declare _ _ _ step ] ] if ; : word>quot ( word dfa -- quot ) @@ -122,10 +125,13 @@ C: box : dfa>main-word ( dfa -- word ) states>words [ states>code ] keep start-state>> ; +: word-template ( quot -- quot' ) + '[ drop [ f ] 2dip over array-capacity? _ [ 2drop ] if ] ; + PRIVATE> : dfa>word ( dfa -- quot ) - dfa>main-word execution-quot '[ drop [ f ] 2dip @ ] + dfa>main-word execution-quot word-template (( start-index string regexp -- i/f )) define-temp ; : dfa>shortest-word ( dfa -- word )