From 745e011ccc11d25e97937fcb8678f2db0f6fc5f6 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Thu, 12 Feb 2009 17:44:04 -0600 Subject: [PATCH] add lzw compression --- basis/compression/lzw/authors.txt | 1 + basis/compression/lzw/lzw-tests.factor | 10 ++ basis/compression/lzw/lzw.factor | 190 +++++++++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 basis/compression/lzw/authors.txt create mode 100644 basis/compression/lzw/lzw-tests.factor create mode 100644 basis/compression/lzw/lzw.factor diff --git a/basis/compression/lzw/authors.txt b/basis/compression/lzw/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/basis/compression/lzw/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/basis/compression/lzw/lzw-tests.factor b/basis/compression/lzw/lzw-tests.factor new file mode 100644 index 0000000000..6cb41b97a0 --- /dev/null +++ b/basis/compression/lzw/lzw-tests.factor @@ -0,0 +1,10 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors tools.test compression.lzw ; +IN: compression.lzw.tests + +[ V{ 7 258 8 8 258 6 } ] +[ B{ 7 7 7 8 8 7 7 6 6 } lzw-compress output>> ] unit-test + +[ B{ 7 7 7 8 8 7 7 6 6 } ] +[ V{ 7 258 8 8 258 6 } lzw-uncompress output>> ] unit-test diff --git a/basis/compression/lzw/lzw.factor b/basis/compression/lzw/lzw.factor new file mode 100644 index 0000000000..fe24e97007 --- /dev/null +++ b/basis/compression/lzw/lzw.factor @@ -0,0 +1,190 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors assocs bitstreams byte-vectors combinators io +io.encodings.binary io.streams.byte-array kernel math sequences +vectors ; +IN: compression.lzw + +CONSTANT: clear-code 256 +CONSTANT: end-of-information 257 + +TUPLE: lzw input output end-of-input? table count k omega omega-k #bits +code old-code ; + +SYMBOL: table-full + +ERROR: index-too-big n ; + +: lzw-bit-width ( n -- n' ) + { + { [ dup 510 <= ] [ drop 9 ] } + { [ dup 1022 <= ] [ drop 10 ] } + { [ dup 2046 <= ] [ drop 11 ] } + { [ dup 4094 <= ] [ drop 12 ] } + [ drop table-full ] + } cond ; + +: lzw-bit-width-compress ( lzw -- n ) + count>> lzw-bit-width ; + +: lzw-bit-width-uncompress ( lzw -- n ) + table>> length lzw-bit-width ; + +: initial-compress-table ( -- assoc ) + 258 iota [ [ 1vector ] keep ] H{ } map>assoc ; + +: initial-uncompress-table ( -- seq ) + 258 iota [ 1vector ] V{ } map-as ; + +: reset-lzw ( lzw -- lzw ) + 257 >>count + V{ } clone >>omega + V{ } clone >>omega-k + 9 >>#bits ; + +: reset-lzw-compress ( lzw -- lzw ) + f >>k + initial-compress-table >>table reset-lzw ; + +: reset-lzw-uncompress ( lzw -- lzw ) + initial-uncompress-table >>table reset-lzw ; + +: ( input -- obj ) + lzw new + swap >>input + binary >>output + reset-lzw-compress ; + +: ( input -- obj ) + lzw new + swap >>input + BV{ } clone >>output + reset-lzw-uncompress ; + +: push-k ( lzw -- lzw ) + [ ] + [ k>> ] + [ omega>> clone [ push ] keep ] tri >>omega-k ; + +: omega-k-in-table? ( lzw -- ? ) + [ omega-k>> ] [ table>> ] bi key? ; + +ERROR: not-in-table ; + +: write-output ( lzw -- ) + [ + [ omega>> ] [ table>> ] bi at* [ not-in-table ] unless + ] [ + [ lzw-bit-width-compress ] + [ output>> write-bits ] bi + ] bi ; + +: omega-k>omega ( lzw -- lzw ) + dup omega-k>> clone >>omega ; + +: k>omega ( lzw -- lzw ) + dup k>> 1vector >>omega ; + +: add-omega-k ( lzw -- ) + [ [ 1+ ] change-count count>> ] + [ omega-k>> clone ] + [ table>> ] tri set-at ; + +: lzw-compress-char ( lzw k -- ) + >>k push-k dup omega-k-in-table? [ + omega-k>omega drop + ] [ + [ write-output ] + [ add-omega-k ] + [ k>omega drop ] tri + ] if ; + +: (lzw-compress-chars) ( lzw -- ) + dup lzw-bit-width-compress table-full = [ + drop + ] [ + dup input>> stream-read1 + [ [ lzw-compress-char ] [ drop (lzw-compress-chars) ] 2bi ] + [ t >>end-of-input? drop ] if* + ] if ; + +: lzw-compress-chars ( lzw -- ) + { + [ [ clear-code lzw-compress-char ] [ reset-lzw-compress drop ] bi ] + [ (lzw-compress-chars) ] + [ end-of-information lzw-compress-char ] + [ ] + } cleave dup end-of-input?>> [ drop ] [ lzw-compress-chars ] if ; + +: lzw-compress ( byte-array -- seq ) + binary + [ lzw-compress-chars ] [ output>> stream>> ] bi ; + +: lookup-old-code ( lzw -- vector ) + [ old-code>> ] [ table>> ] bi nth ; + +: lookup-code ( lzw -- vector ) + [ code>> ] [ table>> ] bi nth ; + +: code-in-table? ( lzw -- ? ) + [ code>> ] [ table>> length ] bi < ; + +: code>old-code ( lzw -- lzw ) + dup code>> >>old-code ; + +: write-code ( lzw -- ) + [ lookup-code ] [ output>> ] bi push-all ; + +: add-to-table ( seq lzw -- ) table>> push ; + +: lzw-read ( lzw -- lzw n ) + [ ] [ lzw-bit-width-uncompress ] [ input>> ] tri read-bits ; + +DEFER: lzw-uncompress-char +: handle-clear-code ( lzw -- ) + reset-lzw-uncompress + lzw-read dup end-of-information = [ + 2drop + ] [ + >>code + [ write-code ] + [ code>old-code ] bi + lzw-uncompress-char + ] if ; + +: handle-uncompress-code ( lzw -- lzw ) + dup code-in-table? [ + [ write-code ] + [ + [ + [ lookup-old-code ] + [ lookup-code first ] bi suffix + ] [ add-to-table ] bi + ] [ code>old-code ] tri + ] [ + [ + [ lookup-old-code dup first suffix ] keep + [ output>> push-all ] [ add-to-table ] 2bi + ] [ code>old-code ] bi + ] if ; + +: lzw-uncompress-char ( lzw -- ) + lzw-read [ + >>code + dup code>> end-of-information = [ + drop + ] [ + dup code>> clear-code = [ + handle-clear-code + ] [ + handle-uncompress-code + lzw-uncompress-char + ] if + ] if + ] [ + drop + ] if* ; + +: lzw-uncompress ( seq -- byte-array ) + binary + [ lzw-uncompress-char ] [ output>> ] bi ;