assocs: moving collect-by from math.statistics.

char-rename
John Benediktsson 2016-12-28 12:56:19 -08:00
parent fa090a2292
commit b141e0d2fb
13 changed files with 62 additions and 161 deletions

View File

@ -137,47 +137,6 @@ HELP: sorted-histogram
} }
} ; } ;
HELP: sequence>assoc
{ $values
{ "seq" sequence } { "map-quot" quotation } { "insert-quot" quotation } { "exemplar" "an exemplar assoc" }
{ "assoc" assoc }
}
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created " { $snippet "assoc" } ". The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
{ $examples
{ $example "! Iterate over a sequence and increment the count at each element"
"! The first quotation has stack effect ( key -- key ), a no-op"
"USING: assocs prettyprint kernel math.statistics ;"
"\"aaabc\" [ ] [ inc-at ] H{ } sequence>assoc ."
"H{ { 97 3 } { 98 1 } { 99 1 } }"
}
} ;
HELP: sequence>assoc!
{ $values
{ "assoc" assoc } { "seq" sequence } { "map-quot" quotation } { "insert-quot" quotation } }
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to an existing " { $snippet "assoc" } ". The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
{ $examples
{ $example "! Iterate over a sequence and add the counts to an existing assoc"
"USING: assocs prettyprint math.statistics kernel ;"
"H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ ] [ inc-at ] sequence>assoc! ."
"H{ { 97 5 } { 98 2 } { 99 1 } }"
}
} ;
HELP: sequence>hashtable
{ $values
{ "seq" sequence } { "map-quot" quotation } { "insert-quot" quotation }
{ "hashtable" hashtable }
}
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created hashtable. The " { $snippet "map-quot" } " gets passed each element from the sequence. Its outputs are passed along with the assoc being constructed to the " { $snippet "insert-quot" } ", which can modify the assoc in response." }
{ $examples
{ $example "! Count the number of times an element occurs in a sequence"
"USING: assocs kernel prettyprint math.statistics ;"
"\"aaabc\" [ ] [ inc-at ] sequence>hashtable ."
"H{ { 97 3 } { 98 1 } { 99 1 } }"
}
} ;
HELP: cum-sum HELP: cum-sum
{ $values { "seq" sequence } { "seq'" sequence } } { $values { "seq" sequence } { "seq'" sequence } }
{ $description "Returns the cumulative sum of " { $snippet "seq" } "." } { $description "Returns the cumulative sum of " { $snippet "seq" } "." }
@ -199,7 +158,7 @@ HELP: cum-sum0
} ; } ;
HELP: cum-count HELP: cum-count
{ $values { "seq" sequence } { "quot" quotation } { "seq'" sequence } } { $values { "seq" sequence } { "quot" { $quotation ( elt -- ? ) } } { "seq'" sequence } }
{ $description "Returns the cumulative count of how many times " { $snippet "quot" } " returns true." } { $description "Returns the cumulative count of how many times " { $snippet "quot" } " returns true." }
{ $examples { $examples
{ $example "USING: math math.statistics prettyprint ;" { $example "USING: math math.statistics prettyprint ;"
@ -271,37 +230,6 @@ HELP: rescale
{ $values { "u" sequence } { "v" sequence } } { $values { "u" sequence } { "v" sequence } }
{ $description "Returns " { $snippet "u" } " rescaled to run from 0 to 1 over the range min to max." } ; { $description "Returns " { $snippet "u" } " rescaled to run from 0 to 1 over the range min to max." } ;
HELP: collect-by
{ $values
{ "seq" sequence } { "quot" { $quotation ( ... obj -- ... key ) } }
{ "hashtable" hashtable }
}
{ $description "Applies a quotation to each element in the input sequence and returns a " { $snippet "hashtable" } " of like elements. The keys of this " { $snippet "hashtable" } " are the output of " { $snippet "quot" } " and the values at each key are the elements that transformed to that key." }
{ $examples
"Collect even and odd elements:"
{ $example
"USING: math math.statistics prettyprint ;"
"{ 11 12 13 14 14 13 12 11 } [ odd? ] collect-by ."
"H{ { t V{ 11 13 13 11 } } { f V{ 12 14 14 12 } } }"
}
}
{ $notes "May be named " { $snippet "group-by" } " in other languages." } ;
HELP: collect-index-by
{ $values
{ "seq" sequence } { "quot" { $quotation ( ... obj -- ... key ) } }
{ "hashtable" hashtable }
}
{ $description "Applies a quotation to each element in the input sequence and returns a " { $snippet "hashtable" } " of like elements. The keys of this " { $snippet "hashtable" } " are the output of " { $snippet "quot" } " and the values at each key are the indices for the elements that transformed to that key." }
{ $examples
"Collect even and odd elements:"
{ $example
"USING: math math.statistics prettyprint ;"
"{ 11 12 13 14 14 13 12 11 } [ odd? ] collect-index-by ."
"H{ { t V{ 0 2 5 7 } } { f V{ 1 3 4 6 } } }"
}
} ;
HELP: z-score HELP: z-score
{ $values { "seq" sequence } { "n" number } } { $values { "seq" sequence } { "n" number } }
{ $description "Calculates the Z-Score for " { $snippet "seq" } "." } ; { $description "Calculates the Z-Score for " { $snippet "seq" } "." } ;
@ -313,12 +241,6 @@ ARTICLE: "histogram" "Computing histograms"
histogram-by histogram-by
histogram! histogram!
sorted-histogram sorted-histogram
}
"Combinators for implementing histogram:"
{ $subsections
sequence>assoc
sequence>assoc!
sequence>hashtable
} ; } ;
ARTICLE: "cumulative" "Computing cumulative sequences" ARTICLE: "cumulative" "Computing cumulative sequences"
@ -360,8 +282,6 @@ ARTICLE: "math.statistics" "Statistics"
{ $subsections kth-smallest } { $subsections kth-smallest }
"Counting the frequency of occurrence of elements:" "Counting the frequency of occurrence of elements:"
{ $subsections "histogram" } { $subsections "histogram" }
"Collecting related items:"
{ $subsections collect-by collect-index-by }
"Computing cumulative sequences:" "Computing cumulative sequences:"
{ $subsections "cumulative" } ; { $subsections "cumulative" } ;

View File

@ -99,15 +99,6 @@ IN: math.statistics.tests
{ H{ { 1 1/2 } { 2 1/6 } { 3 1/3 } } } { H{ { 1 1/2 } { 2 1/6 } { 3 1/3 } } }
[ { 1 1 1 1 1 1 2 2 3 3 3 3 } normalized-histogram ] unit-test [ { 1 1 1 1 1 1 2 2 3 3 3 3 } normalized-histogram ] unit-test
{
V{ 0 3 6 9 }
V{ 1 4 7 }
V{ 2 5 8 }
} [
10 iota [ 3 mod ] collect-by
[ 0 of ] [ 1 of ] [ 2 of ] tri
] unit-test
{ 0 } [ { 1 } { 1 } sample-cov ] unit-test { 0 } [ { 1 } { 1 } sample-cov ] unit-test
{ 2/3 } [ { 1 2 3 } { 4 5 6 } population-cov ] unit-test { 2/3 } [ { 1 2 3 } { 4 5 6 } population-cov ] unit-test
@ -215,22 +206,6 @@ IN: math.statistics.tests
{ { 1 2 6 } } { { 1 2 6 } }
[ { 2 3 4 } cum-product1 ] unit-test [ { 2 3 4 } cum-product1 ] unit-test
{
H{
{ 0 V{ 600 603 606 609 } }
{ 1 V{ 601 604 607 610 } }
{ 2 V{ 602 605 608 } }
}
}
[ 600 610 [a,b] [ 3 mod ] collect-by ] unit-test
{
H{ { 0 V{ 0 3 6 9 } } { 1 V{ 1 4 7 10 } } { 2 V{ 2 5 8 } } }
}
[ 600 610 [a,b] [ 3 mod ] collect-index-by ] unit-test
{ { 1 } } [ { { 1 } } [
{ 1 2 3 4 5 10 21 12 12 12 12203 3403 030 3022 2 2 } { 1 2 3 4 5 10 21 12 12 12 12203 3403 030 3022 2 2 }
{ 1/1000 } quantile5 { 1/1000 } quantile5

View File

@ -1,9 +1,8 @@
! Copyright (C) 2008 Doug Coleman, Michael Judge, Loryn Jenkins. ! Copyright (C) 2008 Doug Coleman, Michael Judge, Loryn Jenkins.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs combinators generalizations kernel locals math USING: arrays assocs combinators fry generalizations grouping
math.functions math.order math.vectors math.ranges sequences kernel locals math math.functions math.order math.vectors
sequences.private sorting fry arrays grouping sets sequences sequences.private sorting ;
splitting.monotonic ;
IN: math.statistics IN: math.statistics
: power-mean ( seq p -- x ) : power-mean ( seq p -- x )
@ -208,36 +207,11 @@ PRIVATE>
: trimean ( seq -- x ) : trimean ( seq -- x )
quartile first3 [ 2 * ] dip + + 4 / ; quartile first3 [ 2 * ] dip + + 4 / ;
<PRIVATE
: (sequence>assoc) ( seq map-quot insert-quot assoc -- assoc )
[ swap curry compose each ] keep ; inline
: (sequence-index>assoc) ( seq map-quot insert-quot assoc -- assoc )
[ swap curry compose each-index ] keep ; inline
PRIVATE>
: sequence>assoc! ( assoc seq map-quot: ( x -- ..y ) insert-quot: ( ..y assoc -- ) -- assoc )
4 nrot (sequence>assoc) ; inline
: sequence>assoc ( seq map-quot insert-quot exemplar -- assoc )
clone (sequence>assoc) ; inline
: sequence-index>assoc ( seq map-quot insert-quot exemplar -- assoc )
clone (sequence-index>assoc) ; inline
: sequence-index>hashtable ( seq map-quot insert-quot -- hashtable )
H{ } sequence-index>assoc ; inline
: sequence>hashtable ( seq map-quot insert-quot -- hashtable )
H{ } sequence>assoc ; inline
: histogram! ( hashtable seq -- hashtable ) : histogram! ( hashtable seq -- hashtable )
[ ] [ inc-at ] sequence>assoc! ; over '[ _ inc-at ] each ;
: histogram-by ( seq quot: ( x -- bin ) -- hashtable ) : histogram-by ( seq quot: ( x -- bin ) -- hashtable )
[ inc-at ] sequence>hashtable ; inline H{ } clone [ '[ @ _ inc-at ] each ] keep ; inline
: histogram ( seq -- hashtable ) : histogram ( seq -- hashtable )
[ ] histogram-by ; [ ] histogram-by ;
@ -248,12 +222,6 @@ PRIVATE>
: normalized-histogram ( seq -- alist ) : normalized-histogram ( seq -- alist )
[ histogram ] [ length ] bi '[ _ / ] assoc-map ; [ histogram ] [ length ] bi '[ _ / ] assoc-map ;
: collect-index-by ( ... seq quot: ( ... obj -- ... key ) -- ... hashtable )
[ dip swap ] curry [ push-at ] sequence-index>hashtable ; inline
: collect-by ( ... seq quot: ( ... obj -- ... key ) -- ... hashtable )
[ keep swap ] curry [ push-at ] sequence>hashtable ; inline
: equal-probabilities ( n -- array ) : equal-probabilities ( n -- array )
dup recip <array> ; inline dup recip <array> ; inline
@ -353,8 +321,8 @@ PRIVATE>
: cum-mean ( seq -- seq' ) : cum-mean ( seq -- seq' )
0 swap [ [ + dup ] dip 1 + / ] map-index nip ; 0 swap [ [ + dup ] dip 1 + / ] map-index nip ;
: cum-count ( seq quot -- seq' ) : cum-count ( seq quot: ( elt -- ? ) -- seq' )
[ 0 ] dip '[ _ call [ 1 + ] when ] accumulate* ; inline [ 0 ] dip '[ @ [ 1 + ] when ] accumulate* ; inline
: cum-min ( seq -- seq' ) : cum-min ( seq -- seq' )
dup ?first [ min ] accumulate* ; dup ?first [ min ] accumulate* ;

View File

@ -626,3 +626,24 @@ HELP: zip-index-as
{ $description "Zip a sequence with its index and return an associative list of type " { $snippet "exemplar" } " where the input sequence is the keys and the indices are the values." } ; { $description "Zip a sequence with its index and return an associative list of type " { $snippet "exemplar" } " where the input sequence is the keys and the indices are the values." } ;
{ unzip zip zip-as zip-index zip-index-as } related-words { unzip zip zip-as zip-index zip-index-as } related-words
HELP: collect-by
{ $values
{ "seq" sequence } { "quot" { $quotation ( ... obj -- ... key ) } }
{ "assoc" assoc }
}
{ $description "Applies a quotation to each element in the input sequence and returns a " { $snippet "hashtable" } " of like elements. The keys of this " { $snippet "hashtable" } " are the output of " { $snippet "quot" } " and the values at each key are the elements that transformed to that key." }
{ $examples
"Collect even and odd elements:"
{ $example
"USING: assocs math prettyprint ;"
"{ 11 12 13 14 14 13 12 11 } [ odd? ] collect-by ."
"H{ { t V{ 11 13 13 11 } } { f V{ 12 14 14 12 } } }"
}
"Collect strings by length:"
{ $example
"USING: assocs prettyprint sequences ;"
"{ \"one\" \"two\" \"three\" \"four\" \"five\" } [ length ] collect-by ."
"H{\n { 3 V{ \"one\" \"two\" } }\n { 4 V{ \"four\" \"five\" } }\n { 5 V{ \"three\" } }\n}"
}
} ;

View File

@ -1,4 +1,4 @@
USING: alien.c-types assocs kernel make math namespaces USING: alien.c-types ascii assocs kernel make math namespaces
sequences specialized-arrays tools.test ; sequences specialized-arrays tools.test ;
IN: assocs.tests IN: assocs.tests
SPECIALIZED-ARRAY: double SPECIALIZED-ARRAY: double
@ -307,3 +307,13 @@ unit-test
{ {
V{ { 11 0 } { 22 1 } { 33 2 } } V{ { 11 0 } { 22 1 } { 33 2 } }
} [ { 11 22 33 } V{ } zip-index-as ] unit-test } [ { 11 22 33 } V{ } zip-index-as ] unit-test
{
H{
{ 0 V{ 0 3 6 9 } }
{ 1 V{ 1 4 7 } }
{ 2 V{ 2 5 8 } }
}
} [
10 iota [ 3 mod ] collect-by
] unit-test

View File

@ -239,6 +239,11 @@ M: assoc value-at* swap [ = nip ] curry assoc-find nip ;
: unzip ( assoc -- keys values ) : unzip ( assoc -- keys values )
dup assoc-empty? [ drop { } { } ] [ >alist flip first2 ] if ; dup assoc-empty? [ drop { } { } ] [ >alist flip first2 ] if ;
: collect-by ( ... seq quot: ( ... obj -- ... key ) -- ... assoc )
[ keep swap ] curry H{ } clone [
[ push-at ] curry compose each
] keep ; inline
M: sequence at* M: sequence at*
search-alist [ second t ] [ f ] if ; search-alist [ second t ] [ f ] if ;

View File

@ -23,7 +23,7 @@ M: source-file-error compute-restarts error>> compute-restarts ;
[ [ line#>> 0 or ] sort-with ] { } assoc-map-as sort-keys ; [ [ line#>> 0 or ] sort-with ] { } assoc-map-as sort-keys ;
: group-by-source-file ( errors -- assoc ) : group-by-source-file ( errors -- assoc )
H{ } clone [ [ push-at ] curry [ dup path>> ] prepose each ] keep ; [ path>> ] collect-by ;
TUPLE: error-type-holder type word plural icon quot forget-quot { fatal? initial: t } ; TUPLE: error-type-holder type word plural icon quot forget-quot { fatal? initial: t } ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license ! See http://factorcode.org/license.txt for BSD license
USING: ascii assocs fry io.encodings.ascii io.files kernel math USING: ascii assocs fry io.encodings.ascii io.files kernel math
math.statistics memoize sequences sequences.extras sorting sets ; memoize sequences sequences.extras sorting sets ;
IN: anagrams IN: anagrams
: make-anagram-hash ( strings -- assoc ) : make-anagram-hash ( strings -- assoc )

View File

@ -1,10 +1,10 @@
! Copyright (C) 2009 Jose Antonio Ortega Ruiz. ! Copyright (C) 2009 Jose Antonio Ortega Ruiz.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs definitions help.topics io.pathnames USING: accessors arrays assocs definitions help.topics
kernel math math.order math.statistics memoize namespaces sequences sets io.pathnames kernel memoize namespaces sequences sets sorting
sorting tools.completion tools.crossref vocabs vocabs.parser tools.completion tools.crossref vocabs vocabs.hierarchy
vocabs.hierarchy words ; vocabs.parser words ;
IN: fuel.xref IN: fuel.xref

View File

@ -28,9 +28,12 @@ MEMO: probabilities-seq ( seq -- seq' )
: equal-stratified-sample ( stratified-sequences -- elt ) : equal-stratified-sample ( stratified-sequences -- elt )
random random ; inline random random ; inline
: collect-indices ( seq -- indices )
H{ } clone [ '[ swap _ push-at ] each-index ] keep ;
: balance-labels ( X y n -- X' y' ) : balance-labels ( X y n -- X' y' )
[ [
dup [ ] collect-index-by dup collect-indices
values '[ values '[
_ _ _ equal-stratified-sample _ _ _ equal-stratified-sample
'[ _ swap nth ] bi@ 2array '[ _ swap nth ] bi@ 2array
@ -39,7 +42,7 @@ MEMO: probabilities-seq ( seq -- seq' )
: skew-labels ( X y probs n -- X' y' ) : skew-labels ( X y probs n -- X' y' )
[ [
[ dup [ ] collect-index-by sort-keys values ] dip [ dup collect-indices sort-keys values ] dip
'[ '[
_ _ _ _ stratified-sample _ _ _ _ stratified-sample
'[ _ swap nth ] bi@ 2array '[ _ swap nth ] bi@ 2array

View File

@ -4,7 +4,7 @@
USING: accessors assocs calendar calendar.elapsed USING: accessors assocs calendar calendar.elapsed
colors.constants colors.hex combinators formatting fry colors.constants colors.hex combinators formatting fry
http.client io io.styles json json.reader kernel make math http.client io io.styles json json.reader kernel make math
math.statistics sequences urls ; sequences urls ;
IN: reddit IN: reddit

View File

@ -1,7 +1,7 @@
! Copyright (c) 2012 Anonymous ! Copyright (c) 2012 Anonymous
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs fry io kernel math.parser sequences USING: accessors assocs io kernel math.parser sequences
sorting math.statistics ; sorting ;
IN: rosetta-code.top-rank IN: rosetta-code.top-rank
! http://rosettacode.org/wiki/Top_rank_per_group ! http://rosettacode.org/wiki/Top_rank_per_group

View File

@ -1,9 +1,8 @@
! Copyright (C) 2009 Doug Coleman. ! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs combinators combinators.short-circuit USING: accessors arrays assocs calendar combinators
combinators.smart fry io.encodings.utf8 io.files kernel combinators.smart io.encodings.utf8 io.files kernel math.parser
math.parser math.statistics memoize namespaces sequences memoize namespaces sequences splitting unicode ;
splitting unicode calendar arrays ;
IN: zoneinfo IN: zoneinfo
CONSTANT: zoneinfo-paths CONSTANT: zoneinfo-paths