factor/library/help/search.factor

93 lines
2.4 KiB
Factor
Raw Normal View History

2006-06-11 23:38:39 -04:00
! Copyright (C) 2006 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
IN: help
2006-08-02 17:08:40 -04:00
USING: arrays definitions graphs hashtables help io kernel math
namespaces porter-stemmer prettyprint sequences strings words ;
2006-06-11 23:38:39 -04:00
! Right now this code is specific to the help. It will be
! generalized to an abstract full text search engine later.
2006-06-11 23:38:39 -04:00
: ignored-word? ( str -- ? )
{ "the" "of" "is" "to" "an" "and" "if" "in" "with" "this" "not" "are" "for" "by" "can" "be" "or" "from" "it" "does" "as" } member? ;
: tokenize ( string -- seq )
[ dup letter? swap LETTER? or not ] split*
[ >lower stem ] map
[
dup ignored-word? over length 1 = or swap empty? or not
] subset ;
2006-06-17 01:18:46 -04:00
: index-text ( article string -- )
tokenize [ 1 -rot nest hash+ ] each-with ;
2006-06-22 22:36:56 -04:00
SYMBOL: term-index
: index-article ( article -- )
2006-06-22 22:36:56 -04:00
term-index get [
[ dup [ help ] string-out index-text ] bind
] [
drop
] if* ;
2006-06-22 22:36:56 -04:00
: unindex-article ( article -- )
term-index get [
[ nip remove-hash ] hash-each-with
] [
drop
] if* ;
: discard-irrelevant ( results -- results )
#! Discard results in the low 33%
dup 0 [ second max ] reduce
swap [ first2 rot / 2array ] map-with
[ second 1/3 > ] subset ;
2006-06-11 23:38:39 -04:00
: count-occurrences ( seq -- hash )
[
2006-06-17 01:03:56 -04:00
dup [ [ drop off ] hash-each ] each
[ [ swap +@ ] hash-each ] each
2006-06-11 23:38:39 -04:00
] make-hash ;
: search-help ( phrase -- assoc )
tokenize [ term-index get hash ] map [ ] subset
2006-06-11 23:38:39 -04:00
count-occurrences hash>alist
[ first2 2array ] map
[ [ second ] 2apply swap - ] sort discard-irrelevant ;
2006-06-11 23:38:39 -04:00
: index-help ( -- )
2006-06-22 22:36:56 -04:00
term-index get [
dup clear-hash
[ all-articles [ index-article ] each ] bind
] when* ;
: remove-article ( name -- )
dup articles get hash-member? [
dup unxref-article
dup unindex-article
dup articles get remove-hash
] when drop ;
2006-06-11 23:38:39 -04:00
2006-08-02 16:53:26 -04:00
: add-article ( name article -- )
over remove-article
over >r swap articles get set-hash r>
2006-06-22 22:36:56 -04:00
dup xref-article index-article ;
: remove-word-help ( word -- )
dup word-help [
dup unxref-article
dup unindex-article
] when drop ;
: set-word-help ( word content -- )
over remove-word-help
over >r "help" set-word-prop r>
dup xref-article index-article ;
2006-06-22 01:57:43 -04:00
2006-06-11 23:38:39 -04:00
: search-help. ( phrase -- )
search-help [ first ] map help-outliner ;
2006-08-02 16:53:26 -04:00
! Definition protocol
M: link forget link-name remove-article ;
M: word-link forget f "help" set-word-prop ;