From 5dee39c221c43ccb0d45ec408c501cf582b1fd43 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Tue, 4 Dec 2012 11:43:48 -0800 Subject: [PATCH] machine-learning: Add transform protocol. Add label-encoder, label-binarizer. --- .../label-binarizer/authors.txt | 1 + .../label-binarizer-tests.factor | 16 +++++++++++++ .../label-binarizer/label-binarizer.factor | 23 +++++++++++++++++++ .../label-encoder/authors.txt | 1 + .../label-encoder/label-encoder-tests.factor | 10 ++++++++ .../label-encoder/label-encoder.factor | 18 +++++++++++++++ .../machine-learning/transformer/authors.txt | 1 + .../transformer/transformer.factor | 9 ++++++++ 8 files changed, 79 insertions(+) create mode 100644 extra/machine-learning/label-binarizer/authors.txt create mode 100644 extra/machine-learning/label-binarizer/label-binarizer-tests.factor create mode 100644 extra/machine-learning/label-binarizer/label-binarizer.factor create mode 100644 extra/machine-learning/label-encoder/authors.txt create mode 100644 extra/machine-learning/label-encoder/label-encoder-tests.factor create mode 100644 extra/machine-learning/label-encoder/label-encoder.factor create mode 100644 extra/machine-learning/transformer/authors.txt create mode 100644 extra/machine-learning/transformer/transformer.factor diff --git a/extra/machine-learning/label-binarizer/authors.txt b/extra/machine-learning/label-binarizer/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/machine-learning/label-binarizer/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/machine-learning/label-binarizer/label-binarizer-tests.factor b/extra/machine-learning/label-binarizer/label-binarizer-tests.factor new file mode 100644 index 0000000000..65c38f91f5 --- /dev/null +++ b/extra/machine-learning/label-binarizer/label-binarizer-tests.factor @@ -0,0 +1,16 @@ +! Copyright (C) 2012 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel machine-learning.label-binarizer +machine-learning.transformer tools.test ; +IN: machine-learning.label-binarizer.tests + +{ { { 1 0 0 0 } { 0 0 0 1 } } } [ + { 1 2 6 4 2 } over fit-y + { 1 6 } swap transform-y +] unit-test + +{ { 1 6 } } [ + { 1 2 6 4 2 } over fit-y + { 1 6 } over transform-y swap inverse-transform-y +] unit-test + diff --git a/extra/machine-learning/label-binarizer/label-binarizer.factor b/extra/machine-learning/label-binarizer/label-binarizer.factor new file mode 100644 index 0000000000..60fcfd6d43 --- /dev/null +++ b/extra/machine-learning/label-binarizer/label-binarizer.factor @@ -0,0 +1,23 @@ +! Copyright (C) 2012 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays fry kernel machine-learning.transformer +math.extras sequences sets sorting ; +IN: machine-learning.label-binarizer + +TUPLE: label-binarizer classes_ ; + +: ( -- lb ) + label-binarizer new ; inline + +M: label-binarizer fit-y + [ members natural-sort ] dip classes_<< ; + +M: label-binarizer transform-y + classes_>> dup length '[ + _ search-sorted [ 1 ] dip _ 0 [ set-nth ] keep + ] map ; + +M: label-binarizer inverse-transform-y + classes_>> '[ + [ 1 = ] find drop _ nth + ] map ; diff --git a/extra/machine-learning/label-encoder/authors.txt b/extra/machine-learning/label-encoder/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/machine-learning/label-encoder/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/machine-learning/label-encoder/label-encoder-tests.factor b/extra/machine-learning/label-encoder/label-encoder-tests.factor new file mode 100644 index 0000000000..235bb48af5 --- /dev/null +++ b/extra/machine-learning/label-encoder/label-encoder-tests.factor @@ -0,0 +1,10 @@ +! Copyright (C) 2012 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel machine-learning.label-encoder +machine-learning.transformer tools.test ; +IN: machine-learning.label-encoder.tests + +{ { 1 3 2 4 } } [ + { 1 2 3 4 3 2 3 2 2 3 2 } over fit-y + { 1 3 2 4 } over transform-y swap inverse-transform-y +] unit-test diff --git a/extra/machine-learning/label-encoder/label-encoder.factor b/extra/machine-learning/label-encoder/label-encoder.factor new file mode 100644 index 0000000000..35f1f6ee15 --- /dev/null +++ b/extra/machine-learning/label-encoder/label-encoder.factor @@ -0,0 +1,18 @@ +! Copyright (C) 2012 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors fry kernel locals machine-learning.transformer +math.extras sequences sets sorting ; +IN: machine-learning.label-encoder + +TUPLE: label-encoder classes_ ; + +: ( -- le ) label-encoder new ; inline + +M: label-encoder fit-y ( y transformer -- ) + [ members natural-sort ] dip classes_<< ; + +M: label-encoder transform-y ( y transformer -- y' ) + classes_>> '[ _ search-sorted ] map ; + +M: label-encoder inverse-transform-y ( y' transformer -- y ) + classes_>> '[ _ nth ] map ; diff --git a/extra/machine-learning/transformer/authors.txt b/extra/machine-learning/transformer/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/machine-learning/transformer/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/machine-learning/transformer/transformer.factor b/extra/machine-learning/transformer/transformer.factor new file mode 100644 index 0000000000..d8d661806f --- /dev/null +++ b/extra/machine-learning/transformer/transformer.factor @@ -0,0 +1,9 @@ +! Copyright (C) 2012 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: ; +IN: machine-learning.transformer + +GENERIC: fit-y ( y transformer -- ) +GENERIC: transform-y ( y transformer -- y' ) +GENERIC: inverse-transform-y ( y transformer -- y' ) +