machine-learning.data-sets: renaming slots in the data-set tuple
"features" is a better name for the slot than "data"windows-high-dpi
parent
b672ed91fb
commit
fa2a0dc779
|
@ -6,8 +6,10 @@ kernel math.parser sequences splitting ;
|
|||
|
||||
IN: machine-learning.data-sets
|
||||
|
||||
TUPLE: data-set data target target-names description
|
||||
feature-names ;
|
||||
TUPLE: data-set
|
||||
features targets
|
||||
feature-names target-names
|
||||
description ;
|
||||
|
||||
C: <data-set> data-set
|
||||
|
||||
|
@ -37,23 +39,25 @@ PRIVATE>
|
|||
! Omits the identifiers which are not so interesting.
|
||||
[ but-last [ string>number ] map ] map
|
||||
[ [ rest ] map ] [ [ first ] map ] bi
|
||||
{ "a1" "a2" "a3" "a4" "a5" "a6" }
|
||||
{ "no" "yes" }
|
||||
"monks.names" load-file
|
||||
{ "a1" "a2" "a3" "a4" "a5" "a6" } <data-set> ;
|
||||
<data-set> ;
|
||||
|
||||
: load-iris ( -- data-set )
|
||||
"iris.csv" load-table-csv
|
||||
[ [ unclip-last ] { } map>assoc unzip ] [ 2 tail ] bi*
|
||||
"iris.rst" load-file
|
||||
{
|
||||
"sepal length (cm)" "sepal width (cm)"
|
||||
"petal length (cm)" "petal width (cm)"
|
||||
} <data-set> ;
|
||||
} swap
|
||||
"iris.rst" load-file
|
||||
<data-set> ;
|
||||
|
||||
: load-linnerud ( -- data-set )
|
||||
data-set new
|
||||
"linnerud_exercise.csv" load-table
|
||||
[ >>data ] [ >>feature-names ] bi*
|
||||
[ >>features ] [ >>feature-names ] bi*
|
||||
"linnerud_physiological.csv" load-table
|
||||
[ >>target ] [ >>target-names ] bi*
|
||||
[ >>targets ] [ >>target-names ] bi*
|
||||
"linnerud.rst" load-file >>description ;
|
||||
|
|
|
@ -20,12 +20,12 @@ IN: machine-learning.decision-trees
|
|||
[ [ entropy2 ] [ length ] bi * ] map-sum ; inline
|
||||
|
||||
:: average-gain ( dataset idx -- gain )
|
||||
dataset target>> :> target
|
||||
dataset data>> :> data
|
||||
data target zip :> data-target
|
||||
data-target idx subsets-weighted-entropy :> weighted
|
||||
dataset targets>> :> targets
|
||||
dataset features>> :> features
|
||||
features targets zip :> features-targets
|
||||
features-targets idx subsets-weighted-entropy :> weighted
|
||||
|
||||
target entropy2 weighted data length / - ;
|
||||
targets entropy2 weighted features length / - ;
|
||||
|
||||
: highest-gain-index ( dataset -- idx )
|
||||
dup feature-names>> length <iota> [
|
||||
|
|
Loading…
Reference in New Issue