diff --git a/extra/machine-learning/data-sets/data-sets.factor b/extra/machine-learning/data-sets/data-sets.factor index d7172e9787..fa6ebf9622 100644 --- a/extra/machine-learning/data-sets/data-sets.factor +++ b/extra/machine-learning/data-sets/data-sets.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2012 John Benediktsson ! See http://factorcode.org/license.txt for BSD license -USING: assocs csv io.encodings.utf8 io.files kernel math.parser -sequences ; +USING: accessors ascii assocs csv io.encodings.utf8 io.files +kernel math.parser sequences splitting ; IN: machine-learning.data-sets @@ -17,6 +17,11 @@ C: data-set "resource:extra/machine-learning/data-sets/" prepend utf8 file-contents ; +: load-table ( name -- data names ) + load-file [ blank? ] trim string-lines + [ [ blank? ] split-when ] map unclip + [ [ [ string>number ] map ] map ] dip ; + PRIVATE> : load-iris ( -- data-set ) @@ -32,3 +37,12 @@ PRIVATE> "sepal length (cm)" "sepal width (cm)" "petal length (cm)" "petal width (cm)" } ; + +: load-linnerud ( -- data-set ) + data-set new + "linnerud_exercise.csv" load-table + [ >>data ] [ >>feature-names ] bi* + "linnerud_physiological.csv" load-table + [ >>target ] [ >>target-names ] bi* + "linnerud.rst" load-file >>description ; + diff --git a/extra/machine-learning/data-sets/linnerud.rst b/extra/machine-learning/data-sets/linnerud.rst new file mode 100644 index 0000000000..10e2e1b2fa --- /dev/null +++ b/extra/machine-learning/data-sets/linnerud.rst @@ -0,0 +1,21 @@ +Linnerrud dataset + +Notes +----- +Data Set Characteristics: + :Number of Instances: 20 + :Number of Attributes: 3 + :Missing Attribute Values: None + +The Linnerud dataset constains two small dataset: + +- *exercise*: A list containing the following components: exercise data with + 20 observations on 3 exercise variables: Weight, Waist and Pulse. + +- *physiological*: Data frame with 20 observations on 3 physiological variables: + Chins, Situps and Jumps. + +References +---------- + * http://rgm2.lab.nig.ac.jp/RGM2/func.php?rd_id=mixOmics:linnerud + * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic. diff --git a/extra/machine-learning/data-sets/linnerud_exercise.csv b/extra/machine-learning/data-sets/linnerud_exercise.csv new file mode 100644 index 0000000000..ac0db1b760 --- /dev/null +++ b/extra/machine-learning/data-sets/linnerud_exercise.csv @@ -0,0 +1,21 @@ +Chins Situps Jumps +5 162 60 +2 110 60 +12 101 101 +12 105 37 +13 155 58 +4 101 42 +8 101 38 +6 125 40 +15 200 40 +17 251 250 +17 120 38 +13 210 115 +14 215 105 +1 50 50 +6 70 31 +12 210 120 +4 60 25 +11 230 80 +15 225 73 +2 110 43 diff --git a/extra/machine-learning/data-sets/linnerud_physiological.csv b/extra/machine-learning/data-sets/linnerud_physiological.csv new file mode 100644 index 0000000000..68bd0cd595 --- /dev/null +++ b/extra/machine-learning/data-sets/linnerud_physiological.csv @@ -0,0 +1,21 @@ +Weight Waist Pulse +191 36 50 +189 37 52 +193 38 58 +162 35 62 +189 35 46 +182 36 56 +211 38 56 +167 34 60 +176 31 74 +154 33 56 +169 34 50 +166 33 52 +154 34 64 +247 46 50 +193 36 46 +202 37 62 +176 37 54 +157 32 52 +156 33 54 +138 33 68