2009-10-30 03:55:54 -04:00
USING: assocs debugger hashtables help.markup help.syntax
2009-10-30 04:06:03 -04:00
quotations sequences math ;
2008-01-13 10:41:35 -05:00
IN: math.statistics
2007-10-04 22:48:06 -04:00
HELP: geometric-mean
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
2009-10-30 07:37:02 -04:00
{ $description "Computes the geometric mean of all elements in " { $snippet "seq" } ". The geometric mean measures the central tendency of a data set and minimizes the effects of extreme values." }
2008-03-11 22:01:39 -04:00
{ $examples { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } geometric-mean ." "1.81712059283214" } }
2007-10-04 22:48:06 -04:00
{ $errors "Throws a " { $link signal-error. } " (square-root of 0) if the sequence is empty." } ;
HELP: harmonic-mean
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
2008-11-18 10:30:11 -05:00
{ $description "Computes the harmonic mean of the elements in " { $snippet "seq" } ". The harmonic mean is appropriate when the average of rates is desired." }
{ $notes "Positive reals only." }
2008-03-11 22:01:39 -04:00
{ $examples { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } harmonic-mean ." "6/11" } }
2007-10-04 22:48:06 -04:00
{ $errors "Throws a " { $link signal-error. } " (divide by zero) if the sequence is empty." } ;
HELP: mean
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
{ $description "Computes the arithmetic mean of the elements in " { $snippet "seq" } "." }
2008-03-11 22:01:39 -04:00
{ $examples { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } mean ." "2" } }
2007-10-04 22:48:06 -04:00
{ $errors "Throws a " { $link signal-error. } " (divide by zero) if the sequence is empty." } ;
HELP: median
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
{ $description "Computes the median of " { $snippet "seq" } " by finding the middle element of the sequence using " { $link kth-smallest } ". If there is an even number of elements in the sequence, the median is not unique, so the mean of the two middle values is output." }
2008-01-13 10:41:35 -05:00
{ $examples
2008-03-11 22:01:39 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } median ." "2" }
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } median ." "2+1/2" } }
2007-10-04 22:48:06 -04:00
{ $errors "Throws a " { $link signal-error. } " (divide by zero) if the sequence is empty." } ;
HELP: range
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
{ $description "Computes the difference of the maximum and minimum values in " { $snippet "seq" } "." }
2008-01-13 10:41:35 -05:00
{ $examples
2008-03-11 22:01:39 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } range ." "2" }
2011-01-18 08:30:53 -05:00
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } range ." "3" } } ;
2007-10-04 22:48:06 -04:00
2009-10-30 04:06:03 -04:00
HELP: minmax
{ $values { "seq" sequence } { "min" real } { "max" real } }
2009-12-07 18:26:33 -05:00
{ $description "Finds the minimum and maximum elements of " { $snippet "seq" } " in one pass. Throws an error on an empty sequence." }
2009-10-30 04:06:03 -04:00
{ $examples
{ $example "USING: arrays math.statistics prettyprint ;"
"{ 1 2 3 } minmax 2array ."
"{ 1 3 }"
}
} ;
2007-10-04 22:48:06 -04:00
HELP: std
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
2008-11-18 10:30:11 -05:00
{ $description "Computes the standard deviation of " { $snippet "seq" } ", which is the square root of the variance. It measures how widely spread the values in a sequence are about the mean." }
2008-01-13 10:41:35 -05:00
{ $examples
2012-05-04 17:31:26 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ 7 8 9 } std ." "1.0" } } ;
2007-10-04 22:48:06 -04:00
HELP: ste
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
2008-11-18 10:30:11 -05:00
{ $description "Computes the standard error of the mean for " { $snippet "seq" } ". It's defined as the standard deviation divided by the square root of the length of the sequence, and measures uncertainty associated with the estimate of the mean." }
2008-01-13 10:41:35 -05:00
{ $examples
2012-05-04 17:31:26 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ -2 2 } ste ." "2.0" }
2012-04-10 11:52:12 -04:00
} ;
2007-10-04 22:48:06 -04:00
HELP: var
2009-10-30 04:06:03 -04:00
{ $values { "seq" sequence } { "x" "a non-negative real number" } }
2008-11-18 10:30:11 -05:00
{ $description "Computes the variance of " { $snippet "seq" } ". It's a measurement of the spread of values in a sequence. The larger the variance, the larger the distance of values from the mean." }
2007-10-04 22:48:06 -04:00
{ $notes "If the number of elements in " { $snippet "seq" } " is 1 or less, it outputs 0." }
2008-01-13 10:41:35 -05:00
{ $examples
2008-03-11 22:01:39 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ 1 } var ." "0" }
2012-05-04 17:31:26 -04:00
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } var ." "1" }
{ $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } var ." "1+2/3" } } ;
2007-10-04 22:48:06 -04:00
2012-04-02 20:12:32 -04:00
HELP: cov
2012-04-10 11:52:12 -04:00
{ $values { "{x}" sequence } { "{y}" sequence } { "cov" "a real number" } }
2012-04-02 20:12:32 -04:00
{ $description "Computes the covariance of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ;
HELP: corr
2012-04-10 11:52:12 -04:00
{ $values { "{x}" sequence } { "{y}" sequence } { "corr" "a real number" } }
2012-04-02 20:12:32 -04:00
{ $description "Computes the correlation of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ;
2009-10-30 03:55:54 -04:00
HELP: histogram
{ $values
{ "seq" sequence }
{ "hashtable" hashtable }
}
2011-01-18 09:19:21 -05:00
{ $description "Returns a hashtable where the keys are the elements of the sequence and the values are the number of times they appeared in that sequence." }
2011-01-18 08:30:53 -05:00
{ $examples
2009-10-30 03:55:54 -04:00
{ $example "! Count the number of times an element appears in a sequence."
2009-10-30 14:35:20 -04:00
"USING: prettyprint math.statistics ;"
2009-10-30 03:55:54 -04:00
"\"aaabc\" histogram ."
"H{ { 97 3 } { 98 1 } { 99 1 } }"
}
2011-01-18 09:19:21 -05:00
} ;
2009-10-30 03:55:54 -04:00
2011-09-08 22:07:14 -04:00
HELP: histogram-by
{ $values
{ "seq" sequence }
{ "quot" { $quotation "( x -- bin )" } }
{ "hashtable" hashtable }
}
{ $description "Returns a hashtable where the keys are the elements of the sequence binned by being passed through " { $snippet "quot" } ", and the values are the number of times members of each bin appeared in that sequence." }
{ $examples
2011-10-03 01:19:05 -04:00
{ $unchecked-example "! Count the number of times letters and non-letters appear in a sequence."
2011-09-08 22:07:14 -04:00
"USING: prettyprint math.statistics unicode.categories ;"
"\"aaa123bc\" [ letter? ] histogram-by ."
2011-10-02 20:49:11 -04:00
"H{ { t 5 } { f 3 } }"
2011-09-08 22:07:14 -04:00
}
} ;
2010-02-03 02:28:20 -05:00
HELP: histogram!
2009-10-30 03:55:54 -04:00
{ $values
{ "hashtable" hashtable } { "seq" sequence }
}
2011-01-18 09:19:21 -05:00
{ $description "Takes an existing hashtable and uses " { $link histogram } " to continue counting the number of occurrences of each element." }
2011-01-18 08:30:53 -05:00
{ $examples
2009-10-30 03:55:54 -04:00
{ $example "! Count the number of times the elements of two sequences appear."
2009-10-30 14:35:20 -04:00
"USING: prettyprint math.statistics ;"
2010-02-03 02:28:20 -05:00
"\"aaabc\" histogram \"aaaaaabc\" histogram! ."
2009-10-30 03:55:54 -04:00
"H{ { 97 9 } { 98 2 } { 99 2 } }"
}
2011-01-18 09:19:21 -05:00
} ;
2009-10-30 03:55:54 -04:00
2009-11-02 00:16:26 -05:00
HELP: sorted-histogram
{ $values
{ "seq" sequence }
{ "alist" "an array of key/value pairs" }
}
2011-01-18 08:30:53 -05:00
{ $description "Outputs a " { $link histogram } " of a sequence sorted by number of occurrences from lowest to highest." }
2009-11-02 00:16:26 -05:00
{ $examples
{ $example "USING: prettyprint math.statistics ;"
"" "" abababbbbbbc" sorted-histogram ." ""
"{ { 99 1 } { 97 3 } { 98 8 } }"
}
} ;
2009-10-30 03:55:54 -04:00
HELP: sequence>assoc
{ $values
2011-09-09 21:12:59 -04:00
{ "seq" sequence } { "map-quot" { $quotation "( x -- ..y )" } } { "insert-quot" { $quotation "( ..y assoc -- )" } } { "exemplar" "an exemplar assoc" }
2009-10-30 03:55:54 -04:00
{ "assoc" assoc }
}
2011-09-08 22:07:14 -04:00
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created " { $snippet "assoc" } ". The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
2011-01-18 08:30:53 -05:00
{ $examples
2009-10-30 03:55:54 -04:00
{ $example "! Iterate over a sequence and increment the count at each element"
2011-04-10 00:51:14 -04:00
"! The first quotation has stack effect ( key -- key ), a no-op"
2009-10-30 14:35:20 -04:00
"USING: assocs prettyprint math.statistics ;"
2011-04-10 00:51:14 -04:00
"\"aaabc\" [ ] [ inc-at ] H{ } sequence>assoc ."
2009-10-30 03:55:54 -04:00
"H{ { 97 3 } { 98 1 } { 99 1 } }"
}
2011-01-18 09:19:21 -05:00
} ;
2009-10-30 03:55:54 -04:00
2010-02-03 02:28:20 -05:00
HELP: sequence>assoc!
2009-10-30 03:55:54 -04:00
{ $values
2011-09-09 21:12:59 -04:00
{ "assoc" assoc } { "seq" sequence } { "map-quot" { $quotation "( x -- ..y )" } } { "insert-quot" { $quotation "( ..y assoc -- )" } }
2009-10-30 03:55:54 -04:00
}
2011-09-08 22:07:14 -04:00
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to an existing " { $snippet "assoc" } ". The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
2011-01-18 08:30:53 -05:00
{ $examples
2009-10-30 03:55:54 -04:00
{ $example "! Iterate over a sequence and add the counts to an existing assoc"
2009-10-30 14:35:20 -04:00
"USING: assocs prettyprint math.statistics kernel ;"
2011-04-10 00:51:14 -04:00
"H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ ] [ inc-at ] sequence>assoc! ."
2009-10-30 03:55:54 -04:00
"H{ { 97 5 } { 98 2 } { 99 1 } }"
}
2011-01-18 09:19:21 -05:00
} ;
2009-10-30 03:55:54 -04:00
HELP: sequence>hashtable
{ $values
2011-09-09 21:12:59 -04:00
{ "seq" sequence } { "map-quot" { $quotation "( x -- ..y )" } } { "insert-quot" { $quotation "( ..y assoc -- )" } }
2009-10-30 03:55:54 -04:00
{ "hashtable" hashtable }
}
2011-09-08 22:07:14 -04:00
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created hashtable. The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
2011-01-18 08:30:53 -05:00
{ $examples
2009-10-30 03:55:54 -04:00
{ $example "! Count the number of times an element occurs in a sequence"
2009-10-30 14:35:20 -04:00
"USING: assocs prettyprint math.statistics ;"
2011-04-10 00:51:14 -04:00
"\"aaabc\" [ ] [ inc-at ] sequence>hashtable ."
2009-10-30 03:55:54 -04:00
"H{ { 97 3 } { 98 1 } { 99 1 } }"
}
2011-01-18 09:19:21 -05:00
} ;
2009-10-30 03:55:54 -04:00
2012-04-12 13:30:16 -04:00
HELP: cum-sum
{ $values { "seq" sequence } { "seq'" sequence } }
{ $description "Returns the cumulative sum of " { $snippet "seq" } "." }
{ $examples
{ $example "USING: math.statistics prettyprint ;"
"{ 1 -1 2 -1 4 } cum-sum ."
"{ 1 0 2 1 5 }"
}
} ;
2012-10-25 19:13:25 -04:00
HELP: cum-count
{ $values { "seq" sequence } { "quot" quotation } { "seq'" sequence } }
{ $description "Returns the cumulative count of how many times " { $snippet "quot" } " returns true." }
{ $examples
{ $example "USING: math math.statistics prettyprint ;"
"{ 1 -1 2 -1 4 } [ 0 < ] cum-count ."
"{ 0 1 1 2 2 }"
}
} ;
2012-04-12 13:30:16 -04:00
HELP: cum-product
{ $values { "seq" sequence } { "seq'" sequence } }
{ $description "Returns the cumulative product of " { $snippet "seq" } "." }
{ $examples
{ $example "USING: math.statistics prettyprint ;"
"{ 1 2 3 4 } cum-product ."
"{ 1 2 6 24 }"
}
} ;
HELP: cum-min
{ $values { "seq" sequence } { "seq'" sequence } }
{ $description "Returns the cumulative min of " { $snippet "seq" } "." }
{ $examples
{ $example "USING: math.statistics prettyprint ;"
"{ 5 3 4 1 } cum-min ."
"{ 5 3 3 1 }"
}
} ;
HELP: cum-max
{ $values { "seq" sequence } { "seq'" sequence } }
{ $description "Returns the cumulative max of " { $snippet "seq" } "." }
{ $examples
{ $example "USING: math.statistics prettyprint ;"
"{ 1 -1 3 5 } cum-max ."
"{ 1 1 3 5 }"
}
} ;
2012-05-04 17:18:45 -04:00
HELP: standardize
{ $values { "u" sequence } { "v" sequence } }
{ $description "Shifts and rescales the elements of " { $snippet "u" } " to have zero mean and unit sample variance." } ;
HELP: differences
{ $values { "u" sequence } { "v" sequence } }
{ $description "Returns the successive differences of elements in " { $snippet "u" } "." } ;
HELP: rescale
{ $values { "u" sequence } { "v" sequence } }
{ $description "Returns " { $snippet "u" } " rescaled to run from 0 to 1 over the range min to max." } ;
2009-10-30 03:55:54 -04:00
ARTICLE: "histogram" "Computing histograms"
"Counting elements in a sequence:"
{ $subsections
histogram
2011-09-08 22:07:14 -04:00
histogram-by
2010-02-03 02:28:20 -05:00
histogram!
2009-11-02 00:16:26 -05:00
sorted-histogram
2009-10-30 03:55:54 -04:00
}
"Combinators for implementing histogram:"
{ $subsections
sequence>assoc
2010-02-03 02:28:20 -05:00
sequence>assoc!
2009-10-30 03:55:54 -04:00
sequence>hashtable
} ;
2012-10-25 19:13:25 -04:00
ARTICLE: "cumulative" "Computing cumulative sequences"
"Cumulative mapping combinators:"
{ $subsections
cum-map
cum-map0
}
2012-10-25 19:30:31 -04:00
"Cumulative math:"
2012-10-25 19:13:25 -04:00
{ $subsections
cum-sum
cum-sum0
2012-10-25 19:30:31 -04:00
cum-product
2012-10-25 19:13:25 -04:00
}
2012-10-25 19:30:31 -04:00
"Cumulative comparisons:"
{ $subsections
cum-min
cum-max
}
"Cumulative counting:"
2012-10-25 19:13:25 -04:00
{ $subsections
cum-count
} ;
2009-10-30 03:55:54 -04:00
ARTICLE: "math.statistics" "Statistics"
"Computing the mean:"
{ $subsections mean geometric-mean harmonic-mean }
"Computing the median:"
{ $subsections median lower-median upper-median medians }
"Computing the mode:"
{ $subsections mode }
2009-10-30 04:06:03 -04:00
"Computing the standard deviation, standard error, and variance:"
{ $subsections std ste var }
2009-10-30 03:55:54 -04:00
"Computing the range and minimum and maximum elements:"
{ $subsections range minmax }
"Computing the kth smallest element:"
{ $subsections kth-smallest }
"Counting the frequency of occurrence of elements:"
2012-10-25 22:48:05 -04:00
{ $subsections "histogram" }
2012-10-25 19:13:25 -04:00
"Computing cumulative sequences:"
2012-10-25 22:48:05 -04:00
{ $subsections "cumulative" } ;
2009-10-30 03:55:54 -04:00
ABOUT: "math.statistics"
2012-05-04 17:31:26 -04:00
2012-11-05 11:43:46 -05:00
{ var var-ddof population-var sample-var } related-words
{ std std-ddof population-std sample-std } related-words
{ ste ste-ddof population-ste sample-ste } related-words
{ corr corr-ddof population-corr sample-corr } related-words