From c20ee965c6c32f4cece218a783fc408743c0ec73 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 5 Nov 2012 14:40:14 -0800 Subject: [PATCH] math.statistics: Make you think about which std you want--population or sample. Hopefully use the right words everywhere. --- basis/math/statistics/statistics-docs.factor | 41 +++++++++++-------- basis/math/statistics/statistics-tests.factor | 37 ++++++++++------- basis/math/statistics/statistics.factor | 22 ++++------ 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor index 8f953c2bf3..a335241fbc 100644 --- a/basis/math/statistics/statistics-docs.factor +++ b/basis/math/statistics/statistics-docs.factor @@ -46,33 +46,33 @@ HELP: minmax } } ; -HELP: std +HELP: sample-std { $values { "seq" sequence } { "x" "a non-negative real number"} } -{ $description "Computes the standard deviation of " { $snippet "seq" } ", which is the square root of the variance. It measures how widely spread the values in a sequence are about the mean." } +{ $description "Computes the sample standard deviation of " { $snippet "seq" } ", which is the square root of the sample variance. It measures how widely spread the values in a sequence are about the mean for a random subset of a dataset." } { $examples - { $example "USING: math.statistics prettyprint ;" "{ 7 8 9 } std ." "1.0" } } ; + { $example "USING: math.statistics prettyprint ;" "{ 7 8 9 } sample-std ." "1.0" } } ; -HELP: ste +HELP: sample-ste { $values { "seq" sequence } { "x" "a non-negative real number"} } { $description "Computes the standard error of the mean for " { $snippet "seq" } ". It's defined as the standard deviation divided by the square root of the length of the sequence, and measures uncertainty associated with the estimate of the mean." } { $examples - { $example "USING: math.statistics prettyprint ;" "{ -2 2 } ste ." "2.0" } + { $example "USING: math.statistics prettyprint ;" "{ -2 2 } sample-ste ." "2.0" } } ; -HELP: var +HELP: sample-var { $values { "seq" sequence } { "x" "a non-negative real number"} } -{ $description "Computes the variance of " { $snippet "seq" } ". It's a measurement of the spread of values in a sequence. The larger the variance, the larger the distance of values from the mean." } +{ $description "Computes the variance of " { $snippet "seq" } ". It's a measurement of the spread of values in a sequence." } { $notes "If the number of elements in " { $snippet "seq" } " is 1 or less, it outputs 0." } { $examples - { $example "USING: math.statistics prettyprint ;" "{ 1 } var ." "0" } - { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } var ." "1" } - { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } var ." "1+2/3" } } ; + { $example "USING: math.statistics prettyprint ;" "{ 1 } sample-var ." "0" } + { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 } sample-var ." "1" } + { $example "USING: math.statistics prettyprint ;" "{ 1 2 3 4 } sample-var ." "1+2/3" } } ; -HELP: cov +HELP: population-cov { $values { "{x}" sequence } { "{y}" sequence } { "cov" "a real number" } } { $description "Computes the covariance of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ; -HELP: corr +HELP: population-corr { $values { "{x}" sequence } { "{y}" sequence } { "corr" "a real number" } } { $description "Computes the correlation of two sequences, " { $snippet "{x}" } " and " { $snippet "{y}" } "." } ; @@ -281,8 +281,12 @@ ARTICLE: "math.statistics" "Statistics" { $subsections median lower-median upper-median medians } "Computing the mode:" { $subsections mode } -"Computing the standard deviation, standard error, and variance:" -{ $subsections std ste var } +"Computing the population standard deviation, standard error, and variance:" +{ $subsections population-std population-ste population-var } +"Computing the sample standard deviation, standard error, and variance:" +{ $subsections sample-std sample-ste sample-var } +"Computing the nth delta-degrees-of-freedom statistics:" +{ $subsections std-ddof ste-ddof var-ddof } "Computing the range and minimum and maximum elements:" { $subsections range minmax } "Computing the kth smallest element:" @@ -294,7 +298,8 @@ ARTICLE: "math.statistics" "Statistics" ABOUT: "math.statistics" -{ var var-ddof population-var sample-var } related-words -{ std std-ddof population-std sample-std } related-words -{ ste ste-ddof population-ste sample-ste } related-words -{ corr corr-ddof population-corr sample-corr } related-words +{ var-ddof population-var sample-var } related-words +{ std-ddof population-std sample-std } related-words +{ ste-ddof population-ste sample-ste } related-words +{ corr-ddof population-corr sample-corr } related-words +{ cov-ddof population-cov sample-cov } related-words diff --git a/basis/math/statistics/statistics-tests.factor b/basis/math/statistics/statistics-tests.factor index 396f9842b4..925bd06168 100644 --- a/basis/math/statistics/statistics-tests.factor +++ b/basis/math/statistics/statistics-tests.factor @@ -52,15 +52,14 @@ IN: math.statistics.tests [ 2 ] [ { 1 2 } upper-median ] unit-test [ 3/2 ] [ { 1 2 } median ] unit-test -[ 1 ] [ { 1 2 3 } var ] unit-test -[ 16 ] [ { 4 6 8 10 10 12 14 16 } var ] unit-test +[ 1 ] [ { 1 2 3 } sample-var ] unit-test +[ 16 ] [ { 4 6 8 10 10 12 14 16 } sample-var ] unit-test { 16 } [ { 4 6 8 10 12 14 16 } population-var ] unit-test -{ 1.0 } [ { 7 8 9 } std ] unit-test +{ 1.0 } [ { 7 8 9 } sample-std ] unit-test { 2/3 } [ { 7 8 9 } 0 var-ddof ] unit-test { 2/3 } [ { 7 8 9 } population-var ] unit-test { 1 } [ { 7 8 9 } 1 var-ddof ] unit-test -{ 1 } [ { 7 8 9 } var ] unit-test { 1 } [ { 7 8 9 } sample-var ] unit-test { 2 } [ { 7 8 9 } 2 var-ddof ] unit-test { 0 } [ { 7 8 9 } 3 var-ddof ] unit-test @@ -68,18 +67,18 @@ IN: math.statistics.tests { t } [ { 7 8 9 } 0 std-ddof 0.816496580927726 .0001 ~ ] unit-test { t } [ { 7 8 9 } population-std 0.816496580927726 .0001 ~ ] unit-test { 1.0 } [ { 7 8 9 } 1 std-ddof ] unit-test -{ 1.0 } [ { 7 8 9 } std ] unit-test +{ 1.0 } [ { 7 8 9 } sample-std ] unit-test { 1.0 } [ { 7 8 9 } sample-std ] unit-test { t } [ { 7 8 9 } 2 std-ddof 1.414213562373095 .0001 ~ ] unit-test { 0.0 } [ { 7 8 9 } 3 std-ddof ] unit-test -[ t ] [ { 1 2 3 4 } ste 0.6454972243679028 - .0001 < ] unit-test +[ t ] [ { 1 2 3 4 } sample-ste 0.6454972243679028 - .0001 < ] unit-test -[ t ] [ { 23.2 33.4 22.5 66.3 44.5 } std 18.1906 - .0001 < ] unit-test +[ t ] [ { 23.2 33.4 22.5 66.3 44.5 } sample-std 18.1906 - .0001 < ] unit-test -[ 0 ] [ { 1 } var ] unit-test -[ 0.0 ] [ { 1 } std ] unit-test -[ 0.0 ] [ { 1 } ste ] unit-test +[ 0 ] [ { 1 } sample-var ] unit-test +[ 0.0 ] [ { 1 } sample-std ] unit-test +[ 0.0 ] [ { 1 } sample-ste ] unit-test { 2 } [ { 1 3 5 7 } mean-dev ] unit-test { 4/5 } [ { 1 3 3 3 5 } median-dev ] unit-test @@ -106,11 +105,12 @@ IN: math.statistics.tests [ 0 swap at ] [ 1 swap at ] [ 2 swap at ] tri ] unit-test -[ 0 ] [ { 1 } { 1 } cov ] unit-test -[ 2/3 ] [ { 1 2 3 } { 4 5 6 } cov ] unit-test +[ 0 ] [ { 1 } { 1 } sample-cov ] unit-test +[ 2/3 ] [ { 1 2 3 } { 4 5 6 } population-cov ] unit-test -[ 0.75 ] [ { 1 2 3 4 } dup corr ] unit-test -[ -0.75 ] [ { 1 2 3 4 } { -4 -5 -6 -7 } corr ] unit-test +[ 0.75 ] [ { 1 2 3 4 } dup sample-corr ] unit-test +[ 1.0 ] [ { 1 2 3 4 } dup population-corr ] unit-test +[ -0.75 ] [ { 1 2 3 4 } { -4 -5 -6 -7 } sample-corr ] unit-test [ { 1 2 4 7 } ] [ { 1 1 2 3 } cum-sum ] unit-test [ { 1 1 2 6 } ] [ { 1 1 2 3 } cum-product ] unit-test @@ -173,7 +173,14 @@ IN: math.statistics.tests { t t } [ { 6.5 3.8 6.6 5.7 6.0 6.4 5.3 } standardize - [ mean 0 1e-10 ~ ] [ var 1 1e-10 ~ ] bi + [ mean 0 1e-10 ~ ] [ sample-var 1 1e-10 ~ ] bi +] unit-test + +{ t t } [ + { { 1 -1 2 } { 2 0 0 } { 0 1 -1 } } standardize-2d + flip + [ [ mean ] map { 0 0 0 } 1e-10 v~ ] + [ [ sample-var ] map { 1 1 1 } 1e-10 v~ ] bi ] unit-test { { 0 0 0 } } [ { 1 1 1 } standardize ] unit-test diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor index 838241061e..bcaacbb82e 100644 --- a/basis/math/statistics/statistics.factor +++ b/basis/math/statistics/statistics.factor @@ -265,8 +265,6 @@ PRIVATE> : sample-var ( seq -- x ) 1 var-ddof ; inline -ALIAS: var sample-var - : std-ddof ( seq n -- x ) var-ddof sqrt ; inline @@ -274,9 +272,7 @@ ALIAS: var sample-var : sample-std ( seq -- x ) 1 std-ddof ; inline -ALIAS: std sample-std - -: signal-to-noise ( seq -- x ) [ mean ] [ std ] bi / ; +: signal-to-noise ( seq -- x ) [ mean ] [ population-std ] bi / ; : mean-dev ( seq -- x ) dup mean v-n vabs mean ; @@ -288,8 +284,6 @@ ALIAS: std sample-std : sample-ste ( seq -- x ) 1 ste-ddof ; -ALIAS: ste sample-ste - : ((r)) ( mean(x) mean(y) {x} {y} -- (r) ) ! finds sigma((xi-mean(x))(yi-mean(y)) 0 [ [ [ pick ] dip swap - ] bi@ * + ] 2reduce 2nip ; @@ -298,7 +292,7 @@ ALIAS: ste sample-ste * recip [ [ ((r)) ] keep length 1 - / ] dip * ; : [r] ( {{x,y}...} -- mean(x) mean(y) {x} {y} sx sy ) - first2 [ [ [ mean ] bi@ ] 2keep ] 2keep [ std ] bi@ ; + first2 [ [ [ mean ] bi@ ] 2keep ] 2keep [ population-std ] bi@ ; : r ( {{x,y}...} -- r ) [r] (r) ; @@ -316,20 +310,18 @@ ALIAS: ste sample-ste : cov-ddof ( {x} {y} ddof -- cov ) [ [ dup mean v-n ] bi@ v* ] dip mean-ddof ; -: cov ( {x} {y} -- cov ) 0 cov-ddof ; inline +: population-cov ( {x} {y} -- cov ) 0 cov-ddof ; inline -: unbiased-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline +: sample-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline : corr-ddof ( {x} {y} n -- corr ) - [ [ cov ] ] dip + [ [ population-cov ] ] dip '[ [ _ var-ddof ] bi@ * sqrt ] 2bi / ; : population-corr ( {x} {y} -- corr ) 0 corr-ddof ; inline : sample-corr ( {x} {y} -- corr ) 1 corr-ddof ; inline -ALIAS: corr sample-corr - : cum-map ( seq identity quot -- seq' ) swapd [ dup ] compose map nip ; inline @@ -368,11 +360,11 @@ ALIAS: corr sample-corr [ dup log * ] [ 1 swap - dup log * ] bi + neg 2 log / ; : standardize ( u -- v ) - [ dup mean v-n ] [ std ] bi + [ dup mean v-n ] [ sample-std ] bi dup zero? [ drop ] [ v/n ] if ; : standardize-2d ( u -- v ) - flip dup [ [ mean ] [ std ] bi 2array ] map + flip dup [ [ mean ] [ sample-std ] bi 2array ] map [ [ first v-n ] 2map ] keep [ second v/n ] 2map flip ; : differences ( u -- v )