From 54cb1b968644e7d4d6b6f783dc8a3bc6aad8f68a Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 18 May 2009 02:16:03 -0500 Subject: [PATCH] median used the wrong algorithm. now it runs in O(n) time. add kth-smallest word, used to implement median --- basis/math/statistics/statistics-tests.factor | 18 ++++++ basis/math/statistics/statistics.factor | 58 ++++++++++++++++--- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/basis/math/statistics/statistics-tests.factor b/basis/math/statistics/statistics-tests.factor index b6ff421956..c160d57db7 100644 --- a/basis/math/statistics/statistics-tests.factor +++ b/basis/math/statistics/statistics-tests.factor @@ -13,6 +13,24 @@ IN: math.statistics.tests [ 2 ] [ { 1 2 3 } median ] unit-test [ 5/2 ] [ { 1 2 3 4 } median ] unit-test +[ { } median ] must-fail +[ { } upper-median ] must-fail +[ { } lower-median ] must-fail + +[ 2 ] [ { 1 2 3 4 } lower-median ] unit-test +[ 3 ] [ { 1 2 3 4 } upper-median ] unit-test +[ 3 ] [ { 1 2 3 4 5 } lower-median ] unit-test +[ 3 ] [ { 1 2 3 4 5 } upper-median ] unit-test + + +[ 1 ] [ { 1 } lower-median ] unit-test +[ 1 ] [ { 1 } upper-median ] unit-test +[ 1 ] [ { 1 } median ] unit-test + +[ 1 ] [ { 1 2 } lower-median ] unit-test +[ 2 ] [ { 1 2 } upper-median ] unit-test +[ 3/2 ] [ { 1 2 } median ] unit-test + [ 1 ] [ { 1 2 3 } var ] unit-test [ 1.0 ] [ { 1 2 3 } std ] unit-test [ t ] [ { 1 2 3 4 } ste 0.6454972243679028 - .0001 < ] unit-test diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor index 4cd8c5b888..5b0439906c 100644 --- a/basis/math/statistics/statistics.factor +++ b/basis/math/statistics/statistics.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2008 Doug Coleman, Michael Judge. ! See http://factorcode.org/license.txt for BSD license. USING: arrays combinators kernel math math.analysis -math.functions math.order sequences sorting ; +math.functions math.order sequences sorting locals +sequences.private ; IN: math.statistics : mean ( seq -- n ) @@ -13,13 +14,55 @@ IN: math.statistics : harmonic-mean ( seq -- n ) [ recip ] sigma recip ; -: median ( seq -- n ) +: slow-median ( seq -- n ) natural-sort dup length even? [ [ midpoint@ dup 1 - 2array ] keep nths mean ] [ [ midpoint@ ] keep nth ] if ; +:: kth-smallest ( seq k -- elt ) + #! Wirth's method, Algorithm's + Data structues = Programs p. 84 + #! The algorithm modifiers seq, so we clone it + seq clone :> seq + 0 :> i! + 0 :> j! + 0 :> l! + 0 :> x! + seq length 1 - :> m! + [ l m < ] + [ + k seq nth x! + l i! + m j! + [ i j <= ] + [ + [ i seq nth-unsafe x < ] [ i 1 + i! ] while + [ x j seq nth-unsafe < ] [ j 1 - j! ] while + i j <= [ + i j seq exchange + i 1 + i! + j 1 - j! + ] when + ] do while + + j k < [ i l! ] when + k i < [ j m! ] when + ] while + k seq nth ; inline + +: lower-median ( seq -- elt ) + dup dup length odd? [ midpoint@ ] [ midpoint@ 1 - ] if kth-smallest ; + +: upper-median ( seq -- elt ) + dup midpoint@ kth-smallest ; + +: medians ( seq -- lower upper ) + [ lower-median ] [ upper-median ] bi ; + +: median ( seq -- x ) + dup length odd? [ lower-median ] [ medians + 2 / ] if ; + : minmax ( seq -- min max ) #! find the min and max of a seq in one pass [ 1/0. -1/0. ] dip [ [ min ] [ max ] bi-curry bi* ] each ; @@ -32,15 +75,13 @@ IN: math.statistics dup length 1 <= [ drop 0 ] [ - [ [ mean ] keep [ - sq ] with sigma ] keep - length 1 - / + [ [ mean ] keep [ - sq ] with sigma ] + [ length 1 - ] bi / ] if ; -: std ( seq -- x ) - var sqrt ; +: std ( seq -- x ) var sqrt ; -: ste ( seq -- x ) - [ std ] [ length ] bi sqrt / ; +: ste ( seq -- x ) [ std ] [ length ] bi sqrt / ; : ((r)) ( mean(x) mean(y) {x} {y} -- (r) ) ! finds sigma((xi-mean(x))(yi-mean(y)) @@ -64,4 +105,3 @@ IN: math.statistics [ (r) ] 2keep ! stack is mean(x) mean(y) r sx sy swap / * ! stack is mean(x) mean(y) beta [ swapd * - ] keep ; -