2012-12-04 13:00:02 -05:00
|
|
|
! Copyright (C) 2012 John Benediktsson
|
|
|
|
! See http://factorcode.org/license.txt for BSD license
|
|
|
|
|
2012-12-04 13:21:04 -05:00
|
|
|
USING: accessors ascii assocs csv io.encodings.utf8 io.files
|
|
|
|
kernel math.parser sequences splitting ;
|
2012-12-04 13:00:02 -05:00
|
|
|
|
|
|
|
IN: machine-learning.data-sets
|
|
|
|
|
|
|
|
TUPLE: data-set data target target-names description
|
|
|
|
feature-names ;
|
|
|
|
|
|
|
|
C: <data-set> data-set
|
|
|
|
|
|
|
|
<PRIVATE
|
|
|
|
|
|
|
|
: load-file ( name -- contents )
|
|
|
|
"resource:extra/machine-learning/data-sets/" prepend
|
|
|
|
utf8 file-contents ;
|
|
|
|
|
2012-12-04 13:29:11 -05:00
|
|
|
: numerify ( table -- data names )
|
|
|
|
unclip [ [ [ string>number ] map ] map ] dip ;
|
|
|
|
|
2012-12-04 13:21:04 -05:00
|
|
|
: load-table ( name -- data names )
|
|
|
|
load-file [ blank? ] trim string-lines
|
2012-12-04 13:29:11 -05:00
|
|
|
[ [ blank? ] split-when ] map numerify ;
|
|
|
|
|
|
|
|
: load-table-csv ( name -- data names )
|
|
|
|
load-file string>csv numerify ;
|
2012-12-04 13:21:04 -05:00
|
|
|
|
2012-12-04 13:00:02 -05:00
|
|
|
PRIVATE>
|
|
|
|
|
|
|
|
: load-iris ( -- data-set )
|
2012-12-04 13:29:11 -05:00
|
|
|
"iris.csv" load-table-csv
|
|
|
|
[ [ unclip-last ] { } map>assoc unzip ] [ 2 tail ] bi*
|
2012-12-04 13:00:02 -05:00
|
|
|
"iris.rst" load-file
|
|
|
|
{
|
|
|
|
"sepal length (cm)" "sepal width (cm)"
|
|
|
|
"petal length (cm)" "petal width (cm)"
|
|
|
|
} <data-set> ;
|
2012-12-04 13:21:04 -05:00
|
|
|
|
|
|
|
: load-linnerud ( -- data-set )
|
|
|
|
data-set new
|
|
|
|
"linnerud_exercise.csv" load-table
|
|
|
|
[ >>data ] [ >>feature-names ] bi*
|
|
|
|
"linnerud_physiological.csv" load-table
|
|
|
|
[ >>target ] [ >>target-names ] bi*
|
|
|
|
"linnerud.rst" load-file >>description ;
|