machine-learning.one-hot: Better error checking.
parent
29b549563b
commit
65fcf8208e
|
@ -12,8 +12,17 @@ CONSTANT: test-data {
|
|||
{ "well" "sick" "tired" }
|
||||
}
|
||||
|
||||
! encode { hot sunny light neutral slowly well }
|
||||
{ { 1 0 0 0 0 1 1 0 0 0 1 0 1 1 0 0 } }
|
||||
[ { 0 3 0 2 1 0 } test-data one-hot ] unit-test
|
||||
[ test-data { 0 3 0 2 1 0 } one-hot ] unit-test
|
||||
|
||||
{ { 0 1 0 0 1 0 0 1 0 1 0 1 0 0 1 0 } }
|
||||
[ { 1 2 1 1 0 1 } test-data one-hot ] unit-test
|
||||
[ test-data { 1 2 1 1 0 1 } one-hot ] unit-test
|
||||
|
||||
! need an index for each category, e.g. 6 indices
|
||||
[ test-data { 1 2 } one-hot ]
|
||||
[ one-hot-length-mismatch? ] must-fail-with
|
||||
|
||||
! last category is not within { well sick tired }
|
||||
[ test-data { 1 2 1 1 0 10 } one-hot ]
|
||||
[ one-hot-input-out-of-bounds? ] must-fail-with
|
|
@ -1,11 +1,25 @@
|
|||
! Copyright (C) 2012 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: arrays kernel math.statistics math.vectors sequences
|
||||
USING: arrays kernel math math.statistics math.vectors sequences
|
||||
sequences.extras ;
|
||||
IN: machine-learning.one-hot
|
||||
|
||||
: one-hot ( indices features -- array )
|
||||
ERROR: one-hot-length-mismatch vcategories vinput ;
|
||||
|
||||
ERROR: one-hot-input-out-of-bounds vcategories vinput ;
|
||||
|
||||
: check-one-hot-length ( vcateories vinput -- vcategories vinput )
|
||||
2dup [
|
||||
[ length ] bi@ = [ one-hot-length-mismatch ] unless
|
||||
] [
|
||||
v- [ 1 < ] any?
|
||||
[ one-hot-input-out-of-bounds ] when
|
||||
] 2bi ;
|
||||
|
||||
: one-hot ( features indices -- array )
|
||||
[ 1 ] 2dip
|
||||
[ length ] map
|
||||
[ cum-sum0 v+ ]
|
||||
[ nip sum 0 <array> ] 2bi [ set-nths ] keep ;
|
||||
[ [ length ] map ] dip
|
||||
check-one-hot-length
|
||||
[ [ cum-sum0 ] dip v+ ]
|
||||
[ drop sum 0 <array> ] 2bi
|
||||
[ set-nths ] keep ;
|
||||
|
|
Loading…
Reference in New Issue