From 092dd9fc393a8a71b12a135af7e38e9101ab2bdb Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sun, 1 Nov 2009 23:16:26 -0600 Subject: [PATCH 1/3] add sorted-histogram word --- basis/math/statistics/statistics-docs.factor | 14 ++++++++++++++ basis/math/statistics/statistics.factor | 3 +++ 2 files changed, 17 insertions(+) diff --git a/basis/math/statistics/statistics-docs.factor b/basis/math/statistics/statistics-docs.factor index 3b6e7d62ba..9834f44add 100644 --- a/basis/math/statistics/statistics-docs.factor +++ b/basis/math/statistics/statistics-docs.factor @@ -98,6 +98,19 @@ HELP: histogram* } { $description "Takes an existing hashtable and uses " { $link histogram } " to continue counting the number of occurences of each element." } ; +HELP: sorted-histogram +{ $values + { "seq" sequence } + { "alist" "an array of key/value pairs" } +} +{ $description "Outputs a " { $link histogram } " of a sequence sorted by number of occurences from lowest to highest." } +{ $examples + { $example "USING: prettyprint math.statistics ;" + """"abababbbbbbc" sorted-histogram .""" + "{ { 99 1 } { 97 3 } { 98 8 } }" + } +} ; + HELP: sequence>assoc { $values { "seq" sequence } { "quot" quotation } { "exemplar" "an exemplar assoc" } @@ -145,6 +158,7 @@ ARTICLE: "histogram" "Computing histograms" { $subsections histogram histogram* + sorted-histogram } "Combinators for implementing histogram:" { $subsections diff --git a/basis/math/statistics/statistics.factor b/basis/math/statistics/statistics.factor index 9c72b848ca..73a87ffb72 100644 --- a/basis/math/statistics/statistics.factor +++ b/basis/math/statistics/statistics.factor @@ -79,6 +79,9 @@ PRIVATE> : histogram ( seq -- hashtable ) [ inc-at ] sequence>hashtable ; +: sorted-histogram ( seq -- alist ) + histogram >alist sort-values ; + : collect-values ( seq quot: ( obj hashtable -- ) -- hash ) '[ [ dup @ ] dip push-at ] sequence>hashtable ; inline From d2fe75276ebf33c20655f33abe2fbc4bdf0f9b0e Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 5 Nov 2009 13:13:27 -0600 Subject: [PATCH 2/3] custom inlining for diff and intersect, when given a literal sequence. this cuts off 1/3 of the running time of a microbenchmark --- .../tree/propagation/transforms/transforms.factor | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/basis/compiler/tree/propagation/transforms/transforms.factor b/basis/compiler/tree/propagation/transforms/transforms.factor index 1f40bf00a2..ff68fb2400 100644 --- a/basis/compiler/tree/propagation/transforms/transforms.factor +++ b/basis/compiler/tree/propagation/transforms/transforms.factor @@ -3,7 +3,7 @@ USING: kernel sequences words fry generic accessors classes.tuple classes classes.algebra definitions stack-checker.state quotations classes.tuple.private math -math.partial-dispatch math.private math.intervals +math.partial-dispatch math.private math.intervals sets.private math.floats.private math.integers.private layouts math.order vectors hashtables combinators effects generalizations assocs sets combinators.short-circuit sequences.private locals @@ -290,3 +290,13 @@ CONSTANT: lookup-table-at-max 256 ] [ drop f ] if ; \ at* [ at-quot ] 1 define-partial-eval + +: diff-quot ( seq -- quot: ( seq' -- seq'' ) ) + tester '[ [ @ not ] filter ] ; + +\ diff [ diff-quot ] 1 define-partial-eval + +: intersect-quot ( seq -- quot: ( seq' -- seq'' ) ) + tester '[ _ filter ] ; + +\ intersect [ intersect-quot ] 1 define-partial-eval From 3e28be6568279df3127d66f88ae515a20c895996 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 5 Nov 2009 17:12:10 -0600 Subject: [PATCH 3/3] move sequence-parser to sequences.parser --- basis/compression/run-length/run-length.factor | 2 +- basis/sequences/parser/authors.txt | 2 ++ .../sequences/parser/parser-tests.factor | 2 +- .../sequences/parser/parser.factor | 2 +- extra/c/lexer/lexer-tests.factor | 2 +- extra/c/lexer/lexer.factor | 2 +- extra/c/preprocessor/preprocessor.factor | 2 +- extra/html/parser/parser.factor | 2 +- 8 files changed, 9 insertions(+), 7 deletions(-) create mode 100644 basis/sequences/parser/authors.txt rename extra/sequence-parser/sequence-parser-tests.factor => basis/sequences/parser/parser-tests.factor (98%) rename extra/sequence-parser/sequence-parser.factor => basis/sequences/parser/parser.factor (99%) diff --git a/basis/compression/run-length/run-length.factor b/basis/compression/run-length/run-length.factor index cde2a7e113..ce25cd6a63 100644 --- a/basis/compression/run-length/run-length.factor +++ b/basis/compression/run-length/run-length.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays combinators grouping kernel locals math -math.matrices math.order multiline sequence-parser sequences +math.matrices math.order multiline sequences.parser sequences tools.continuations ; IN: compression.run-length diff --git a/basis/sequences/parser/authors.txt b/basis/sequences/parser/authors.txt new file mode 100644 index 0000000000..a07c427c98 --- /dev/null +++ b/basis/sequences/parser/authors.txt @@ -0,0 +1,2 @@ +Daniel Ehrenberg +Doug Coleman diff --git a/extra/sequence-parser/sequence-parser-tests.factor b/basis/sequences/parser/parser-tests.factor similarity index 98% rename from extra/sequence-parser/sequence-parser-tests.factor rename to basis/sequences/parser/parser-tests.factor index af13e5b86e..f788a6da6a 100644 --- a/extra/sequence-parser/sequence-parser-tests.factor +++ b/basis/sequences/parser/parser-tests.factor @@ -1,6 +1,6 @@ USING: tools.test sequence-parser unicode.categories kernel accessors ; -IN: sequence-parser.tests +IN: sequences.parser.tests [ "hello" ] [ "hello" [ take-rest ] parse-sequence ] unit-test diff --git a/extra/sequence-parser/sequence-parser.factor b/basis/sequences/parser/parser.factor similarity index 99% rename from extra/sequence-parser/sequence-parser.factor rename to basis/sequences/parser/parser.factor index d14a77057f..93bbbdf53d 100644 --- a/extra/sequence-parser/sequence-parser.factor +++ b/basis/sequences/parser/parser.factor @@ -3,7 +3,7 @@ USING: accessors circular combinators.short-circuit fry io kernel locals math math.order sequences sorting.functor sorting.slots unicode.categories ; -IN: sequence-parser +IN: sequences.parser TUPLE: sequence-parser sequence n ; diff --git a/extra/c/lexer/lexer-tests.factor b/extra/c/lexer/lexer-tests.factor index c972b8816c..082827353d 100644 --- a/extra/c/lexer/lexer-tests.factor +++ b/extra/c/lexer/lexer-tests.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors c.lexer kernel sequence-parser tools.test ; +USING: accessors c.lexer kernel sequences.parser tools.test ; IN: c.lexer.tests [ 36 ] diff --git a/extra/c/lexer/lexer.factor b/extra/c/lexer/lexer.factor index 962407e6ec..57894217bd 100644 --- a/extra/c/lexer/lexer.factor +++ b/extra/c/lexer/lexer.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors combinators combinators.short-circuit generalizations kernel locals math.order math.ranges -sequence-parser sequences sorting.functor sorting.slots +sequences.parser sequences sorting.functor sorting.slots unicode.categories ; IN: c.lexer diff --git a/extra/c/preprocessor/preprocessor.factor b/extra/c/preprocessor/preprocessor.factor index 3018fa7a24..e8176c8df8 100644 --- a/extra/c/preprocessor/preprocessor.factor +++ b/extra/c/preprocessor/preprocessor.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2009 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: sequence-parser io io.encodings.utf8 io.files +USING: sequences.parser io io.encodings.utf8 io.files io.streams.string kernel combinators accessors io.pathnames fry sequences arrays locals namespaces io.directories assocs math splitting make unicode.categories diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index 9fcbffd0db..8d506cda28 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays hashtables sequence-parser +USING: accessors arrays hashtables sequences.parser html.parser.utils kernel namespaces sequences math unicode.case unicode.categories combinators.short-circuit quoting fry ;