From 320128c7ebb162b7990012170561eba23a0c960d Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 30 Oct 2009 02:55:54 -0500 Subject: [PATCH] move histogram to math.statistics, write a main article for math.statistics --- basis/math/statistics/statistics-docs.factor | 104 +++++++++++++++++- basis/math/statistics/statistics-tests.factor | 10 ++ basis/math/statistics/statistics.factor | 33 +++++- extra/histogram/histogram-docs.factor | 87 --------------- extra/histogram/histogram-tests.factor | 12 -- extra/histogram/histogram.factor | 29 ----- 6 files changed, 141 insertions(+), 134 deletions(-) delete mode 100755 extra/histogram/histogram-docs.factor delete mode 100755 extra/histogram/histogram-tests.factor delete mode 100755 extra/histogram/histogram.factor diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor index 1a29d611f9..dc54f4181f 100644 --- a/basis/math/statistics/statistics-docs.factor +++ b/basis/math/statistics/statistics-docs.factor @@ -1,4 +1,5 @@ -USING: help.markup help.syntax debugger ; +USING: assocs debugger hashtables help.markup help.syntax +quotations sequences ; IN: math.statistics HELP: geometric-mean @@ -58,3 +59,104 @@ HELP: var { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } var ." "1" } { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } var ." "1+2/3" } } ; + +HELP: histogram +{ $values + { "seq" sequence } + { "hashtable" hashtable } +} +{ $examples + { $example "! Count the number of times an element appears in a sequence." + "USING: prettyprint histogram ;" + "\"aaabc\" histogram ." + "H{ { 97 3 } { 98 1 } { 99 1 } }" + } +} +{ $description "Returns a hashtable where the keys are the elements of the sequence and the values are the number of times they appeared in that sequence." } ; + +HELP: histogram* +{ $values + { "hashtable" hashtable } { "seq" sequence } + { "hashtable" hashtable } +} +{ $examples + { $example "! Count the number of times the elements of two sequences appear." + "USING: prettyprint histogram ;" + "\"aaabc\" histogram \"aaaaaabc\" histogram* ." + "H{ { 97 9 } { 98 2 } { 99 2 } }" + } +} +{ $description "Takes an existing hashtable and uses " { $link histogram } " to continue counting the number of occurences of each element." } ; + +HELP: sequence>assoc +{ $values + { "seq" sequence } { "quot" quotation } { "exemplar" "an exemplar assoc" } + { "assoc" assoc } +} +{ $examples + { $example "! Iterate over a sequence and increment the count at each element" + "USING: assocs prettyprint histogram ;" + "\"aaabc\" [ inc-at ] H{ } sequence>assoc ." + "H{ { 97 3 } { 98 1 } { 99 1 } }" + } +} +{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created " { $snippet "assoc" } " according to the passed quotation." } ; + +HELP: sequence>assoc* +{ $values + { "assoc" assoc } { "seq" sequence } { "quot" quotation } + { "assoc" assoc } +} +{ $examples + { $example "! Iterate over a sequence and add the counts to an existing assoc" + "USING: assocs prettyprint histogram kernel ;" + "H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ inc-at ] sequence>assoc* ." + "H{ { 97 5 } { 98 2 } { 99 1 } }" + } +} +{ $description "Iterates over a sequence, allowing elements of the sequence to be added to an existing " { $snippet "assoc" } " according to the passed quotation." } ; + +HELP: sequence>hashtable +{ $values + { "seq" sequence } { "quot" quotation } + { "hashtable" hashtable } +} +{ $examples + { $example "! Count the number of times an element occurs in a sequence" + "USING: assocs prettyprint histogram ;" + "\"aaabc\" [ inc-at ] sequence>hashtable ." + "H{ { 97 3 } { 98 1 } { 99 1 } }" + } +} +{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a hashtable according to the passed quotation." } ; + +ARTICLE: "histogram" "Computing histograms" +"Counting elements in a sequence:" +{ $subsections + histogram + histogram* +} +"Combinators for implementing histogram:" +{ $subsections + sequence>assoc + sequence>assoc* + sequence>hashtable +} ; + +ARTICLE: "math.statistics" "Statistics" +"Computing the mean:" +{ $subsections mean geometric-mean harmonic-mean } +"Computing the median:" +{ $subsections median lower-median upper-median medians } +"Computing the mode:" +{ $subsections mode } +"Computing the standard deviation and variance:" +{ $subsections std var } +"Computing the range and minimum and maximum elements:" +{ $subsections range minmax } +"Computing the kth smallest element:" +{ $subsections kth-smallest } +"Counting the frequency of occurrence of elements:" +{ $subsection "histogram" } ; + +ABOUT: "math.statistics" diff --git a/basis/math/statistics/statistics-tests.factor b/basis/math/statistics/statistics-tests.factor index 32ebcbc6a1..0d3172f685 100644 --- a/basis/math/statistics/statistics-tests.factor +++ b/basis/math/statistics/statistics-tests.factor @@ -43,3 +43,13 @@ IN: math.statistics.tests [ 0 ] [ { 1 } var ] unit-test [ 0.0 ] [ { 1 } std ] unit-test [ 0.0 ] [ { 1 } ste ] unit-test + +[ + H{ + { 97 2 } + { 98 2 } + { 99 2 } + } +] [ + "aabbcc" histogram +] unit-test diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor index dad0970855..9c72b848ca 100644 --- a/basis/math/statistics/statistics.factor +++ b/basis/math/statistics/statistics.factor @@ -45,7 +45,8 @@ IN: math.statistics k seq nth ; inline : lower-median ( seq -- elt ) - dup dup length odd? [ midpoint@ ] [ midpoint@ 1 - ] if kth-smallest ; + [ ] [ ] [ length odd? ] tri + [ midpoint@ ] [ midpoint@ 1 - ] if kth-smallest ; : upper-median ( seq -- elt ) dup midpoint@ kth-smallest ; @@ -54,13 +55,35 @@ IN: math.statistics [ lower-median ] [ upper-median ] bi ; : median ( seq -- x ) - dup length odd? [ lower-median ] [ medians + 2 / ] if ; + [ ] [ length odd? ] bi [ lower-median ] [ medians + 2 / ] if ; -: frequency ( seq -- hashtable ) - H{ } clone [ '[ _ inc-at ] each ] keep ; +assoc) ( seq quot assoc -- assoc ) + [ swap curry each ] keep ; inline + +PRIVATE> + +: sequence>assoc* ( assoc seq quot: ( obj assoc -- ) -- assoc ) + rot (sequence>assoc) ; inline + +: sequence>assoc ( seq quot: ( obj assoc -- ) exemplar -- assoc ) + clone (sequence>assoc) ; inline + +: sequence>hashtable ( seq quot: ( obj hashtable -- ) -- hashtable ) + H{ } sequence>assoc ; inline + +: histogram* ( hashtable seq -- hashtable ) + [ inc-at ] sequence>assoc* ; + +: histogram ( seq -- hashtable ) + [ inc-at ] sequence>hashtable ; + +: collect-values ( seq quot: ( obj hashtable -- ) -- hash ) + '[ [ dup @ ] dip push-at ] sequence>hashtable ; inline : mode ( seq -- x ) - frequency >alist + histogram >alist [ ] [ [ [ second ] bi@ > ] 2keep ? ] map-reduce first ; : minmax ( seq -- min max ) diff --git a/extra/histogram/histogram-docs.factor b/extra/histogram/histogram-docs.factor deleted file mode 100755 index fc463cabfe..0000000000 --- a/extra/histogram/histogram-docs.factor +++ /dev/null @@ -1,87 +0,0 @@ -IN: histogram -USING: help.markup help.syntax sequences hashtables quotations assocs ; - -HELP: histogram -{ $values - { "seq" sequence } - { "hashtable" hashtable } -} -{ $examples - { $example "! Count the number of times an element appears in a sequence." - "USING: prettyprint histogram ;" - "\"aaabc\" histogram ." - "H{ { 97 3 } { 98 1 } { 99 1 } }" - } -} -{ $description "Returns a hashtable where the keys are the elements of the sequence and the values are the number of times they appeared in that sequence." } ; - -HELP: histogram* -{ $values - { "hashtable" hashtable } { "seq" sequence } - { "hashtable" hashtable } -} -{ $examples - { $example "! Count the number of times the elements of two sequences appear." - "USING: prettyprint histogram ;" - "\"aaabc\" histogram \"aaaaaabc\" histogram* ." - "H{ { 97 9 } { 98 2 } { 99 2 } }" - } -} -{ $description "Takes an existing hashtable and uses " { $link histogram } " to continue counting the number of occurences of each element." } ; - -HELP: sequence>assoc -{ $values - { "seq" sequence } { "quot" quotation } { "exemplar" "an exemplar assoc" } - { "assoc" assoc } -} -{ $examples - { $example "! Iterate over a sequence and increment the count at each element" - "USING: assocs prettyprint histogram ;" - "\"aaabc\" [ inc-at ] H{ } sequence>assoc ." - "H{ { 97 3 } { 98 1 } { 99 1 } }" - } -} -{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created " { $snippet "assoc" } " according to the passed quotation." } ; - -HELP: sequence>assoc* -{ $values - { "assoc" assoc } { "seq" sequence } { "quot" quotation } - { "assoc" assoc } -} -{ $examples - { $example "! Iterate over a sequence and add the counts to an existing assoc" - "USING: assocs prettyprint histogram kernel ;" - "H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ inc-at ] sequence>assoc* ." - "H{ { 97 5 } { 98 2 } { 99 1 } }" - } -} -{ $description "Iterates over a sequence, allowing elements of the sequence to be added to an existing " { $snippet "assoc" } " according to the passed quotation." } ; - -HELP: sequence>hashtable -{ $values - { "seq" sequence } { "quot" quotation } - { "hashtable" hashtable } -} -{ $examples - { $example "! Count the number of times an element occurs in a sequence" - "USING: assocs prettyprint histogram ;" - "\"aaabc\" [ inc-at ] sequence>hashtable ." - "H{ { 97 3 } { 98 1 } { 99 1 } }" - } -} -{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a hashtable according to the passed quotation." } ; - -ARTICLE: "histogram" "Computing histograms" -"Counting elements in a sequence:" -{ $subsections - histogram - histogram* -} -"Combinators for implementing histogram:" -{ $subsections - sequence>assoc - sequence>assoc* - sequence>hashtable -} ; - -ABOUT: "histogram" diff --git a/extra/histogram/histogram-tests.factor b/extra/histogram/histogram-tests.factor deleted file mode 100755 index f0e7b3e80e..0000000000 --- a/extra/histogram/histogram-tests.factor +++ /dev/null @@ -1,12 +0,0 @@ -IN: histogram.tests -USING: help.markup help.syntax tools.test histogram ; - -[ - H{ - { 97 2 } - { 98 2 } - { 99 2 } - } -] [ - "aabbcc" histogram -] unit-test diff --git a/extra/histogram/histogram.factor b/extra/histogram/histogram.factor deleted file mode 100755 index d5c6ab3778..0000000000 --- a/extra/histogram/histogram.factor +++ /dev/null @@ -1,29 +0,0 @@ -! Copyright (C) 2009 Doug Coleman. -! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences assocs fry ; -IN: histogram - -assoc) ( seq quot assoc -- assoc ) - [ swap curry each ] keep ; inline - -PRIVATE> - -: sequence>assoc* ( assoc seq quot: ( obj assoc -- ) -- assoc ) - rot (sequence>assoc) ; inline - -: sequence>assoc ( seq quot: ( obj assoc -- ) exemplar -- assoc ) - clone (sequence>assoc) ; inline - -: sequence>hashtable ( seq quot: ( obj hashtable -- ) -- hashtable ) - H{ } sequence>assoc ; inline - -: histogram* ( hashtable seq -- hashtable ) - [ inc-at ] sequence>assoc* ; - -: histogram ( seq -- hashtable ) - [ inc-at ] sequence>hashtable ; - -: collect-values ( seq quot: ( obj hashtable -- ) -- hash ) - '[ [ dup @ ] dip push-at ] sequence>hashtable ; inline