machine-learning.data-sets: renaming slots in the data-set tuple
"features" is a better name for the slot than "data"windows-high-dpi
parent
b672ed91fb
commit
fa2a0dc779
|
@ -6,8 +6,10 @@ kernel math.parser sequences splitting ;
|
||||||
|
|
||||||
IN: machine-learning.data-sets
|
IN: machine-learning.data-sets
|
||||||
|
|
||||||
TUPLE: data-set data target target-names description
|
TUPLE: data-set
|
||||||
feature-names ;
|
features targets
|
||||||
|
feature-names target-names
|
||||||
|
description ;
|
||||||
|
|
||||||
C: <data-set> data-set
|
C: <data-set> data-set
|
||||||
|
|
||||||
|
@ -37,23 +39,25 @@ PRIVATE>
|
||||||
! Omits the identifiers which are not so interesting.
|
! Omits the identifiers which are not so interesting.
|
||||||
[ but-last [ string>number ] map ] map
|
[ but-last [ string>number ] map ] map
|
||||||
[ [ rest ] map ] [ [ first ] map ] bi
|
[ [ rest ] map ] [ [ first ] map ] bi
|
||||||
|
{ "a1" "a2" "a3" "a4" "a5" "a6" }
|
||||||
{ "no" "yes" }
|
{ "no" "yes" }
|
||||||
"monks.names" load-file
|
"monks.names" load-file
|
||||||
{ "a1" "a2" "a3" "a4" "a5" "a6" } <data-set> ;
|
<data-set> ;
|
||||||
|
|
||||||
: load-iris ( -- data-set )
|
: load-iris ( -- data-set )
|
||||||
"iris.csv" load-table-csv
|
"iris.csv" load-table-csv
|
||||||
[ [ unclip-last ] { } map>assoc unzip ] [ 2 tail ] bi*
|
[ [ unclip-last ] { } map>assoc unzip ] [ 2 tail ] bi*
|
||||||
"iris.rst" load-file
|
|
||||||
{
|
{
|
||||||
"sepal length (cm)" "sepal width (cm)"
|
"sepal length (cm)" "sepal width (cm)"
|
||||||
"petal length (cm)" "petal width (cm)"
|
"petal length (cm)" "petal width (cm)"
|
||||||
} <data-set> ;
|
} swap
|
||||||
|
"iris.rst" load-file
|
||||||
|
<data-set> ;
|
||||||
|
|
||||||
: load-linnerud ( -- data-set )
|
: load-linnerud ( -- data-set )
|
||||||
data-set new
|
data-set new
|
||||||
"linnerud_exercise.csv" load-table
|
"linnerud_exercise.csv" load-table
|
||||||
[ >>data ] [ >>feature-names ] bi*
|
[ >>features ] [ >>feature-names ] bi*
|
||||||
"linnerud_physiological.csv" load-table
|
"linnerud_physiological.csv" load-table
|
||||||
[ >>target ] [ >>target-names ] bi*
|
[ >>targets ] [ >>target-names ] bi*
|
||||||
"linnerud.rst" load-file >>description ;
|
"linnerud.rst" load-file >>description ;
|
||||||
|
|
|
@ -20,12 +20,12 @@ IN: machine-learning.decision-trees
|
||||||
[ [ entropy2 ] [ length ] bi * ] map-sum ; inline
|
[ [ entropy2 ] [ length ] bi * ] map-sum ; inline
|
||||||
|
|
||||||
:: average-gain ( dataset idx -- gain )
|
:: average-gain ( dataset idx -- gain )
|
||||||
dataset target>> :> target
|
dataset targets>> :> targets
|
||||||
dataset data>> :> data
|
dataset features>> :> features
|
||||||
data target zip :> data-target
|
features targets zip :> features-targets
|
||||||
data-target idx subsets-weighted-entropy :> weighted
|
features-targets idx subsets-weighted-entropy :> weighted
|
||||||
|
|
||||||
target entropy2 weighted data length / - ;
|
targets entropy2 weighted features length / - ;
|
||||||
|
|
||||||
: highest-gain-index ( dataset -- idx )
|
: highest-gain-index ( dataset -- idx )
|
||||||
dup feature-names>> length <iota> [
|
dup feature-names>> length <iota> [
|
||||||
|
|
Loading…
Reference in New Issue