2012-11-17 15:38:52 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! Copyright (C) 2012 Doug Coleman.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								! See http://factorcode.org/license.txt for BSD license.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								USING: arrays assocs fry kernel math math.functions
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								math.statistics memoize random sequences sorting ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								IN: machine-learning.rebalancing
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								ERROR: probability-sum-not-one seq ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: check-probabilities ( seq -- seq )
							 | 
						
					
						
							
								
									
										
										
										
											2015-08-13 19:13:05 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    dup sum 1.0 .00000000001 ~ [ probability-sum-not-one ] unless ;
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:38:52 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: equal-probabilities ( n -- array )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dup recip <array> ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								MEMO: probabilities-seq ( seq -- seq' )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    check-probabilities [ >float ] map cum-sum ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: probabilities-quot ( seq -- quot )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    probabilities-seq
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    '[ _ random-unit '[ _ > ] find drop ] ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: stratified-sample ( stratified-sequences probability-sequence -- elt )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    probabilities-quot call swap nth random ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-28 12:13:16 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: stratified-samples ( stratified-sequences probability-sequence n -- elt )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [ '[ _ _ stratified-sample ] ] dip swap replicate ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:52:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: equal-stratified-sample ( stratified-sequences -- elt )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    random random ; inline
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-12-28 15:56:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								: collect-indices ( seq -- indices )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    H{ } clone [ '[ swap _ push-at ] each-index ] keep ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:38:52 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: balance-labels ( X y n -- X' y' )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [
							 | 
						
					
						
							
								
									
										
										
										
											2016-12-28 15:56:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        dup collect-indices
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:52:55 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        values '[
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            _ _ _ equal-stratified-sample
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:38:52 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            '[ _ swap nth ] bi@ 2array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] dip swap replicate [ keys ] [ values ] bi ;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								: skew-labels ( X y probs n -- X' y' )
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    [
							 | 
						
					
						
							
								
									
										
										
										
											2016-12-28 15:56:19 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        [ dup collect-indices sort-keys values ] dip
							 | 
						
					
						
							
								
									
										
										
										
											2012-11-17 15:38:52 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        '[
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            _ _ _ _ stratified-sample
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            '[ _ swap nth ] bi@ 2array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ] dip swap replicate [ keys ] [ values ] bi ;
							 |