math.statistics: Add ddof (delta degrees-of-freedom) to easily calculate population (full) and unbiased (sample) statistics.
parent
7906632952
commit
923f3592c8
|
@ -5,6 +5,7 @@ IN: math.statistics.tests
|
|||
[ 3 ] [ { 1 2 3 4 5 } 1 power-mean ] unit-test
|
||||
[ t ] [ { 1 2 3 4 5 } [ 2 power-mean ] [ quadratic-mean ] bi 1e-10 ~ ] unit-test
|
||||
[ 1 ] [ { 1 } mean ] unit-test
|
||||
[ 0 ] [ { } mean ] unit-test
|
||||
[ 3/2 ] [ { 1 2 } mean ] unit-test
|
||||
[ 0 ] [ { 0 0 0 } geometric-mean ] unit-test
|
||||
[ t ] [ { 2 2 2 2 } geometric-mean 2.0 .0001 ~ ] unit-test
|
||||
|
@ -54,8 +55,24 @@ IN: math.statistics.tests
|
|||
[ 1 ] [ { 1 2 3 } var ] unit-test
|
||||
[ 16 ] [ { 4 6 8 10 10 12 14 16 } var ] unit-test
|
||||
|
||||
[ 16 ] [ { 4 6 8 10 12 14 16 } full-var ] unit-test
|
||||
[ 1.0 ] [ { 7 8 9 } std ] unit-test
|
||||
{ 16 } [ { 4 6 8 10 12 14 16 } full-var ] unit-test
|
||||
{ 1.0 } [ { 7 8 9 } std ] unit-test
|
||||
{ 2/3 } [ { 7 8 9 } 0 var-ddof ] unit-test
|
||||
{ 2/3 } [ { 7 8 9 } full-var ] unit-test
|
||||
{ 1 } [ { 7 8 9 } 1 var-ddof ] unit-test
|
||||
{ 1 } [ { 7 8 9 } var ] unit-test
|
||||
{ 1 } [ { 7 8 9 } sample-var ] unit-test
|
||||
{ 2 } [ { 7 8 9 } 2 var-ddof ] unit-test
|
||||
{ 0 } [ { 7 8 9 } 3 var-ddof ] unit-test
|
||||
|
||||
{ t } [ { 7 8 9 } 0 std-ddof 0.816496580927726 .0001 ~ ] unit-test
|
||||
{ t } [ { 7 8 9 } full-std 0.816496580927726 .0001 ~ ] unit-test
|
||||
{ 1.0 } [ { 7 8 9 } 1 std-ddof ] unit-test
|
||||
{ 1.0 } [ { 7 8 9 } std ] unit-test
|
||||
{ 1.0 } [ { 7 8 9 } sample-std ] unit-test
|
||||
{ t } [ { 7 8 9 } 2 std-ddof 1.414213562373095 .0001 ~ ] unit-test
|
||||
{ 0.0 } [ { 7 8 9 } 3 std-ddof ] unit-test
|
||||
|
||||
[ t ] [ { 1 2 3 4 } ste 0.6454972243679028 - .0001 < ] unit-test
|
||||
|
||||
[ t ] [ { 23.2 33.4 22.5 66.3 44.5 } std 18.1906 - .0001 < ] unit-test
|
||||
|
@ -159,6 +176,8 @@ IN: math.statistics.tests
|
|||
[ mean 0 1e-10 ~ ] [ var 1 1e-10 ~ ] bi
|
||||
] unit-test
|
||||
|
||||
{ { 0 0 0 } } [ { 1 1 1 } standardize ] unit-test
|
||||
|
||||
{ { 0 1/4 1/2 3/4 1 } } [ 5 iota rescale ] unit-test
|
||||
|
||||
|
||||
|
|
|
@ -9,8 +9,16 @@ IN: math.statistics
|
|||
: power-mean ( seq p -- x )
|
||||
[ '[ _ ^ ] map-sum ] [ [ length / ] [ recip ^ ] bi* ] 2bi ; inline
|
||||
|
||||
! Delta in degrees-of-freedom
|
||||
: mean-ddof ( seq ddof -- x )
|
||||
[ [ sum ] [ length ] bi ] dip -
|
||||
dup zero? [ 2drop 0 ] [ / ] if ; inline
|
||||
|
||||
: mean ( seq -- x )
|
||||
[ sum ] [ length ] bi / ; inline
|
||||
0 mean-ddof ; inline
|
||||
|
||||
: unbiased-mean ( seq -- x )
|
||||
1 mean-ddof ; inline
|
||||
|
||||
: sum-of-squares ( seq -- x )
|
||||
[ sq ] map-sum ; inline
|
||||
|
@ -246,26 +254,25 @@ PRIVATE>
|
|||
: range ( seq -- x )
|
||||
minmax swap - ;
|
||||
|
||||
: sample-var ( seq -- x )
|
||||
#! normalize by N-1; unbiased
|
||||
dup length 1 <= [
|
||||
drop 0
|
||||
: var-ddof ( seq n -- x )
|
||||
2dup [ length ] dip - 0 <= [
|
||||
2drop 0
|
||||
] [
|
||||
[ sum-of-squared-errors ] [ length 1 - ] bi /
|
||||
] if ;
|
||||
[ [ sum-of-squared-errors ] [ length ] bi ] dip - /
|
||||
] if ; inline
|
||||
|
||||
: full-var ( seq -- x )
|
||||
dup length 1 <= [
|
||||
drop 0
|
||||
] [
|
||||
[ sum-of-squared-errors ] [ length ] bi /
|
||||
] if ;
|
||||
: full-var ( seq -- x ) 0 var-ddof ; inline
|
||||
|
||||
: sample-var ( seq -- x ) 1 var-ddof ; inline
|
||||
|
||||
ALIAS: var sample-var
|
||||
|
||||
: sample-std ( seq -- x ) sample-var sqrt ;
|
||||
: std-ddof ( seq n -- x )
|
||||
var-ddof sqrt ; inline
|
||||
|
||||
: full-std ( seq -- x ) full-var sqrt ;
|
||||
: full-std ( seq -- x ) 0 std-ddof ; inline
|
||||
|
||||
: sample-std ( seq -- x ) 1 std-ddof ; inline
|
||||
|
||||
ALIAS: std sample-std
|
||||
|
||||
|
@ -275,9 +282,11 @@ ALIAS: std sample-std
|
|||
|
||||
: median-dev ( seq -- x ) dup median v-n vabs mean ;
|
||||
|
||||
: sample-ste ( seq -- x ) [ sample-std ] [ length ] bi sqrt / ;
|
||||
: ste-ddof ( seq n -- x ) '[ _ std-ddof ] [ length ] bi sqrt / ;
|
||||
|
||||
: full-ste ( seq -- x ) [ full-std ] [ length ] bi sqrt / ;
|
||||
: full-ste ( seq -- x ) 0 ste-ddof ;
|
||||
|
||||
: sample-ste ( seq -- x ) 1 ste-ddof ;
|
||||
|
||||
ALIAS: ste sample-ste
|
||||
|
||||
|
@ -304,14 +313,20 @@ ALIAS: ste sample-ste
|
|||
swap / * ! stack is mean(x) mean(y) beta
|
||||
[ swapd * - ] keep ;
|
||||
|
||||
: cov ( {x} {y} -- cov )
|
||||
[ dup mean v-n ] bi@ v* mean ;
|
||||
: cov-ddof ( {x} {y} ddof -- cov )
|
||||
[ [ dup mean v-n ] bi@ v* ] dip mean-ddof ;
|
||||
|
||||
: sample-corr ( {x} {y} -- corr )
|
||||
[ cov ] [ [ sample-var ] bi@ * sqrt ] 2bi / ;
|
||||
: cov ( {x} {y} -- cov ) 0 cov-ddof ; inline
|
||||
|
||||
: full-corr ( {x} {y} -- corr )
|
||||
[ cov ] [ [ full-var ] bi@ * sqrt ] 2bi / ;
|
||||
: unbiased-cov ( {x} {y} -- cov ) 1 cov-ddof ; inline
|
||||
|
||||
: corr-ddof ( {x} {y} n -- corr )
|
||||
[ [ cov ] ] dip
|
||||
'[ [ _ var-ddof ] bi@ * sqrt ] 2bi / ;
|
||||
|
||||
: full-corr ( {x} {y} -- corr ) 0 corr-ddof ; inline
|
||||
|
||||
: sample-corr ( {x} {y} -- corr ) 1 corr-ddof ; inline
|
||||
|
||||
ALIAS: corr sample-corr
|
||||
|
||||
|
@ -343,7 +358,8 @@ ALIAS: corr sample-corr
|
|||
[ dup log * ] [ 1 swap - dup log * ] bi + neg 2 log / ;
|
||||
|
||||
: standardize ( u -- v )
|
||||
[ dup mean v-n ] [ std ] bi v/n ;
|
||||
[ dup mean v-n ] [ std ] bi
|
||||
dup zero? [ drop ] [ v/n ] if ;
|
||||
|
||||
: differences ( u -- v )
|
||||
[ 1 tail-slice ] keep [ - ] 2map ;
|
||||
|
@ -358,3 +374,4 @@ ALIAS: corr sample-corr
|
|||
[ values ] map [ 0 [ length + ] accumulate nip ] [ ] bi zip
|
||||
] [ length f <array> ] bi
|
||||
[ '[ first2 [ _ set-nth ] with each ] each ] keep ;
|
||||
|
||||
|
|
Loading…
Reference in New Issue