machine-learning.decision-trees: vocab for creating decision trees
not donefactor-shell
parent
7b2c2a93f5
commit
6dac2075df
|
@ -0,0 +1,16 @@
|
|||
! Copyright (C) 2018 Björn Lindqvist
|
||||
! See http://factorcode.org/license.txt for BSD license
|
||||
USING: kernel machine-learning.data-sets
|
||||
machine-learning.decision-trees math.extras sequences tools.test ;
|
||||
IN: machine-learning.decision-trees.tests
|
||||
|
||||
{ { 0.08 0.01 0.0 0.03 0.29 0.0 } } [
|
||||
"monks-1.train" load-monks
|
||||
6 <iota> [
|
||||
average-gain 2 round-to-decimal
|
||||
] with map
|
||||
] unit-test
|
||||
|
||||
{ 4 } [
|
||||
"monks-1.train" load-monks highest-gain-index
|
||||
] unit-test
|
|
@ -0,0 +1,33 @@
|
|||
! Copyright (C) 2018 Björn Lindqvist
|
||||
! See http://factorcode.org/license.txt for BSD license
|
||||
USING: accessors assocs fry grouping.extras kernel locals math
|
||||
math.functions math.statistics sequences sequences.extras sorting ;
|
||||
IN: machine-learning.decision-trees
|
||||
|
||||
! Why convert the logarithm to base 2? I don't know.
|
||||
: entropy2 ( seq -- e )
|
||||
normalized-histogram values entropy 2 log / ;
|
||||
|
||||
: group-by-sorted ( seq quot: ( elt -- key ) -- groups )
|
||||
[ sort-with ] keep group-by ; inline
|
||||
|
||||
: subsets-weighted-entropy ( data-target idx -- seq )
|
||||
! Group the data according to the given index.
|
||||
'[ first _ swap nth ] group-by-sorted
|
||||
! Then unpack the partitioned groups of targets
|
||||
'[ [ second ] map ] assoc-map values
|
||||
! Finally, calculate the weighted entropy for each group
|
||||
[ [ entropy2 ] [ length ] bi * ] map-sum ; inline
|
||||
|
||||
:: average-gain ( dataset idx -- gain )
|
||||
dataset target>> :> target
|
||||
dataset data>> :> data
|
||||
data target zip :> data-target
|
||||
data-target idx subsets-weighted-entropy :> weighted
|
||||
|
||||
target entropy2 weighted data length / - ;
|
||||
|
||||
: highest-gain-index ( dataset -- idx )
|
||||
dup feature-names>> length <iota> [
|
||||
average-gain
|
||||
] with map arg-max ;
|
Loading…
Reference in New Issue