2006-06-11 23:38:39 -04:00
|
|
|
! Copyright (C) 2006 Slava Pestov.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
IN: help
|
|
|
|
USING: arrays graphs hashtables help io kernel math namespaces
|
|
|
|
porter-stemmer prettyprint sequences strings ;
|
|
|
|
|
2006-06-12 02:41:19 -04:00
|
|
|
! Right now this code is specific to the help. It will be
|
|
|
|
! generalized to an abstract full text search engine later.
|
|
|
|
|
2006-06-11 23:38:39 -04:00
|
|
|
: ignored-word? ( str -- ? )
|
|
|
|
{ "the" "of" "is" "to" "an" "and" "if" "in" "with" "this" "not" "are" "for" "by" "can" "be" "or" "from" "it" "does" "as" } member? ;
|
|
|
|
|
|
|
|
: tokenize ( string -- seq )
|
|
|
|
[ dup letter? swap LETTER? or not ] split*
|
|
|
|
[ >lower stem ] map
|
|
|
|
[
|
|
|
|
dup ignored-word? over length 1 = or swap empty? or not
|
|
|
|
] subset ;
|
|
|
|
|
2006-06-12 02:41:19 -04:00
|
|
|
: index-text ( score article string -- )
|
|
|
|
tokenize [ >r 2dup r> nest hash+ ] each 2drop ;
|
|
|
|
|
|
|
|
: index-article-title ( article -- )
|
2006-06-16 23:12:40 -04:00
|
|
|
3 swap dup article-title index-text ;
|
2006-06-12 02:41:19 -04:00
|
|
|
|
|
|
|
: index-article-content ( article -- )
|
|
|
|
1 swap dup [ help ] string-out index-text ;
|
|
|
|
|
|
|
|
: index-article ( article -- )
|
|
|
|
dup index-article-title index-article-content ;
|
|
|
|
|
|
|
|
SYMBOL: term-index
|
|
|
|
|
|
|
|
: discard-irrelevant ( results -- results )
|
|
|
|
#! Discard results in the low 33%
|
|
|
|
dup 0 [ second max ] reduce
|
|
|
|
swap [ first2 rot / 2array ] map-with
|
|
|
|
[ second 1/3 > ] subset ;
|
|
|
|
|
2006-06-11 23:38:39 -04:00
|
|
|
: count-occurrences ( seq -- hash )
|
|
|
|
[
|
2006-06-17 01:03:56 -04:00
|
|
|
dup [ [ drop off ] hash-each ] each
|
2006-06-12 02:41:19 -04:00
|
|
|
[ [ swap +@ ] hash-each ] each
|
2006-06-11 23:38:39 -04:00
|
|
|
] make-hash ;
|
|
|
|
|
2006-06-12 02:41:19 -04:00
|
|
|
: search-help ( phrase -- assoc )
|
|
|
|
tokenize [ term-index get hash ] map [ ] subset
|
2006-06-11 23:38:39 -04:00
|
|
|
count-occurrences hash>alist
|
2006-06-12 02:41:19 -04:00
|
|
|
[ first2 2array ] map
|
|
|
|
[ [ second ] 2apply swap - ] sort discard-irrelevant ;
|
2006-06-11 23:38:39 -04:00
|
|
|
|
2006-06-12 02:41:19 -04:00
|
|
|
: index-help ( -- )
|
|
|
|
[ all-articles [ index-article ] each ] make-hash
|
|
|
|
term-index set-global ;
|
2006-06-11 23:38:39 -04:00
|
|
|
|
|
|
|
: search-help. ( phrase -- )
|
2006-06-12 02:41:19 -04:00
|
|
|
"Search results for ``" write dup write "'':" print
|
|
|
|
search-help [
|
2006-06-14 01:47:28 -04:00
|
|
|
first <link> [ article-title ] keep write-object terpri
|
2006-06-12 02:41:19 -04:00
|
|
|
] each ;
|