fix various typos; cleanup and fully document boyer-moore
							parent
							
								
									d8f813a531
								
							
						
					
					
						commit
						426d8f09b2
					
				| 
						 | 
				
			
			@ -514,7 +514,7 @@ PRIVATE>
 | 
			
		|||
: 4seq ( parser1 parser2 parser3 parser4 -- parser )
 | 
			
		||||
    4array seq ;
 | 
			
		||||
 | 
			
		||||
: seq* ( quot -- paser )
 | 
			
		||||
: seq* ( quot -- parser )
 | 
			
		||||
    { } make seq ; inline
 | 
			
		||||
 | 
			
		||||
: choice ( seq -- parser )
 | 
			
		||||
| 
						 | 
				
			
			@ -529,7 +529,7 @@ PRIVATE>
 | 
			
		|||
: 4choice ( parser1 parser2 parser3 parser4 -- parser )
 | 
			
		||||
    4array choice ;
 | 
			
		||||
 | 
			
		||||
: choice* ( quot -- paser )
 | 
			
		||||
: choice* ( quot -- parser )
 | 
			
		||||
    { } make choice ; inline
 | 
			
		||||
 | 
			
		||||
: repeat0 ( parser -- parser )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,13 +5,22 @@ IN: boyer-moore
 | 
			
		|||
 | 
			
		||||
HELP: <boyer-moore>
 | 
			
		||||
{ $values
 | 
			
		||||
  { "pat" sequence } { "bm" boyer-moore }
 | 
			
		||||
  { "pattern" sequence } { "boyer-moore" boyer-moore }
 | 
			
		||||
}
 | 
			
		||||
{ $description
 | 
			
		||||
  "Given a pattern performs pattern preprocessing and returns "
 | 
			
		||||
  "results as an (opaque) object that is reusable across "
 | 
			
		||||
  "searches in different sequences via " { $link search-from }
 | 
			
		||||
  " generic word."
 | 
			
		||||
  "searches in different sequences via " { $link search-from } "."
 | 
			
		||||
} { $examples
 | 
			
		||||
    { $example
 | 
			
		||||
        "USING: boyer-moore prettyprint ;"
 | 
			
		||||
        "\"abc\" <boyer-moore> ."
 | 
			
		||||
        "T{ boyer-moore
 | 
			
		||||
    { pattern \"abc\" }
 | 
			
		||||
    { bad-char-table H{ { 97 0 } { 98 -1 } { 99 -2 } } }
 | 
			
		||||
    { good-suffix-table { 3 3 1 } }
 | 
			
		||||
}"
 | 
			
		||||
    }
 | 
			
		||||
} ;
 | 
			
		||||
 | 
			
		||||
HELP: search-from
 | 
			
		||||
| 
						 | 
				
			
			@ -21,12 +30,18 @@ HELP: search-from
 | 
			
		|||
  { "obj" object }
 | 
			
		||||
  { "i/f" "the index of first match or " { $link f } }
 | 
			
		||||
}
 | 
			
		||||
{ $description "Performs an attempt to find the first "
 | 
			
		||||
{ $contract "Performs an attempt to find the first "
 | 
			
		||||
  "occurrence of pattern in " { $snippet "seq" }
 | 
			
		||||
  " starting from " { $snippet "from" } " using "
 | 
			
		||||
  "Boyer-Moore search algorithm. Output is the index "
 | 
			
		||||
  "if the attempt was succeessful and " { $link f }
 | 
			
		||||
  "if the attempt was succeessful, or " { $link f }
 | 
			
		||||
  " otherwise."
 | 
			
		||||
} { $examples
 | 
			
		||||
    { $example
 | 
			
		||||
        "USING: boyer-moore prettyprint ;"
 | 
			
		||||
        "{ 1 2 7 10 20 2 7 10 } 3 { 2 7 10 } search-from ."
 | 
			
		||||
        "5"
 | 
			
		||||
    }
 | 
			
		||||
} ;
 | 
			
		||||
 | 
			
		||||
HELP: search
 | 
			
		||||
| 
						 | 
				
			
			@ -37,21 +52,27 @@ HELP: search
 | 
			
		|||
}
 | 
			
		||||
{ $description "A simpler variant of " { $link search-from }
 | 
			
		||||
  " that starts searching from the beginning of the sequence."
 | 
			
		||||
} { $examples
 | 
			
		||||
    { $example
 | 
			
		||||
        "USING: boyer-moore prettyprint ;"
 | 
			
		||||
        "\"Source string\" \"ce st\" search ."
 | 
			
		||||
        "4"
 | 
			
		||||
    }
 | 
			
		||||
} ;
 | 
			
		||||
 | 
			
		||||
ARTICLE: "boyer-moore" "The Boyer-Moore algorithm"
 | 
			
		||||
{ $heading "Summary" }
 | 
			
		||||
"The " { $vocab-link "boyer-moore" } " vocabulary "
 | 
			
		||||
"implements a Boyer-Moore string search algorithm with "
 | 
			
		||||
"so-called 'strong good suffix shift rule'. Since algorithm is "
 | 
			
		||||
"alphabet-independent it is applicable to searching in any "
 | 
			
		||||
"collection that implements " { $links "sequence-protocol" } "."
 | 
			
		||||
"implements a Boyer-Moore string search algorithm with the "
 | 
			
		||||
"so-called 'strong good suffix shift rule'. Since the algorithm is "
 | 
			
		||||
"alphabet-independent, it is applicable to searching in any "
 | 
			
		||||
"collection that implements the " { $links "sequence-protocol" } "."
 | 
			
		||||
 | 
			
		||||
{ $heading "Complexity" }
 | 
			
		||||
"Let " { $snippet "n" } " and " { $snippet "m" } " be lengths "
 | 
			
		||||
"Let " { $snippet "n" } " and " { $snippet "m" } " be the lengths "
 | 
			
		||||
"of the sequences being searched " { $emphasis "in" } " and "
 | 
			
		||||
{ $emphasis "for" } " respectively. Then searching runs in "
 | 
			
		||||
{ $snippet "O(n)" } " time in its worst case using additional "
 | 
			
		||||
{ $snippet "O(n)" } " time worst-case, using additional "
 | 
			
		||||
{ $snippet "O(m)" } " space. The preprocessing phase runs in "
 | 
			
		||||
{ $snippet "O(m)" } " time."
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,26 +21,26 @@ IN: boyer-moore
 | 
			
		|||
    [ length dup ] [ <reversed> ] bi
 | 
			
		||||
    [ (partial-suffixes) ] map-index 2nip ; inline
 | 
			
		||||
 | 
			
		||||
: <gs-table> ( seq -- table )
 | 
			
		||||
: <good-suffix-table> ( seq -- table )
 | 
			
		||||
    z-values [ partial-suffixes ] [ normal-suffixes ] bi
 | 
			
		||||
    [ [ nip ] when* ] 2map reverse! ; inline
 | 
			
		||||
 | 
			
		||||
: insert-bc-shift ( table elt len i -- table )
 | 
			
		||||
: insert-bad-char-shift ( table elt len i -- table )
 | 
			
		||||
    1 + swap - swap pick 2dup key?
 | 
			
		||||
    [ 3drop ] [ set-at ] if ; inline
 | 
			
		||||
 | 
			
		||||
: <bc-table> ( seq -- table )
 | 
			
		||||
: <bad-char-table> ( seq -- table )
 | 
			
		||||
    H{ } clone swap [ length ] keep
 | 
			
		||||
    [ insert-bc-shift ] with each-index ; inline
 | 
			
		||||
    [ insert-bad-char-shift ] with each-index ; inline
 | 
			
		||||
 | 
			
		||||
TUPLE: boyer-moore pattern bc-table gs-table ;
 | 
			
		||||
TUPLE: boyer-moore pattern bad-char-table good-suffix-table ;
 | 
			
		||||
 | 
			
		||||
: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline
 | 
			
		||||
: good-suffix-shift ( i c boyer-moore -- s ) nip good-suffix-table>> nth-unsafe ; inline
 | 
			
		||||
 | 
			
		||||
: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline
 | 
			
		||||
: bad-char-shift ( i c boyer-moore -- s ) bad-char-table>> at dup 1 ? + ; inline
 | 
			
		||||
 | 
			
		||||
: do-shift ( pos i c bm -- newpos )
 | 
			
		||||
    [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline
 | 
			
		||||
: do-shift ( pos i c boyer-moore -- newpos )
 | 
			
		||||
    [ good-suffix-shift ] [ bad-char-shift ] bi-curry 2bi max + ; inline
 | 
			
		||||
 | 
			
		||||
: match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -48,23 +48,23 @@ TUPLE: boyer-moore pattern bc-table gs-table ;
 | 
			
		|||
    len 1 - [ [ pos + s1 ] keep s2 match? not ]
 | 
			
		||||
    find-last-integer ; inline
 | 
			
		||||
 | 
			
		||||
:: (search-from) ( seq from bm -- i/f )
 | 
			
		||||
    bm pattern>>      :> pat
 | 
			
		||||
    pat length        :> plen
 | 
			
		||||
    seq length plen - :> lim
 | 
			
		||||
:: (search-from) ( seq from boyer-moore -- i/f )
 | 
			
		||||
    boyer-moore pattern>> :> pat
 | 
			
		||||
    pat length            :> plen
 | 
			
		||||
    seq length plen -     :> lim
 | 
			
		||||
    from
 | 
			
		||||
    [
 | 
			
		||||
        dup lim <=
 | 
			
		||||
        [
 | 
			
		||||
            seq pat pick plen mismatch?
 | 
			
		||||
            [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if*
 | 
			
		||||
            [ 2dup + seq nth-unsafe boyer-moore do-shift t ] [ f ] if*
 | 
			
		||||
        ] [ drop f f ] if
 | 
			
		||||
    ] loop ; inline
 | 
			
		||||
 | 
			
		||||
PRIVATE>
 | 
			
		||||
 | 
			
		||||
: <boyer-moore> ( pat -- bm )
 | 
			
		||||
    dup <reversed> [ <bc-table> ] [ <gs-table> ] bi
 | 
			
		||||
: <boyer-moore> ( pattern -- boyer-moore )
 | 
			
		||||
    dup <reversed> [ <bad-char-table> ] [ <good-suffix-table> ] bi
 | 
			
		||||
    boyer-moore boa ;
 | 
			
		||||
 | 
			
		||||
GENERIC: search-from ( seq from obj -- i/f )
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue