factor/extra/serialize/serialize.factor

294 lines
7.3 KiB
Factor
Raw Normal View History

2007-09-20 18:09:08 -04:00
! Copyright (C) 2006 Adam Langley and Chris Double.
! Adam Langley was the original author of this work.
!
! Chris Double modified it to fix bugs and get it working
! correctly under the latest versions of Factor.
!
! See http://factorcode.org/license.txt for BSD license.
!
IN: serialize
USING: namespaces sequences kernel math io math.functions
io.binary strings classes words sbufs tuples arrays
vectors byte-arrays bit-arrays quotations hashtables
2008-03-08 03:51:26 -05:00
assocs help.syntax help.markup float-arrays splitting
io.encodings.string io.encodings.utf8 combinators ;
2007-09-20 18:09:08 -04:00
2008-03-08 04:50:03 -05:00
! Variable holding a assoc of objects already serialized
2007-09-20 18:09:08 -04:00
SYMBOL: serialized
2008-03-08 04:50:03 -05:00
TUPLE: id obj ;
C: <id> id
M: id hashcode* id-obj hashcode* ;
M: id equal? over id? [ [ id-obj ] 2apply eq? ] [ 2drop f ] if ;
: add-object ( obj -- )
2007-09-20 18:09:08 -04:00
#! Add an object to the sequence of already serialized
2008-03-08 04:50:03 -05:00
#! objects.
serialized get [ assoc-size swap <id> ] keep set-at ;
2007-09-20 18:09:08 -04:00
: object-id ( obj -- id )
#! Return the id of an already serialized object
2008-03-08 04:50:03 -05:00
<id> serialized get at ;
2007-09-20 18:09:08 -04:00
! Serialize object
GENERIC: (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
! Numbers are serialized as follows:
! 0 => B{ 0 }
! 1<=x<=126 => B{ x | 0x80 }
! x>127 => B{ length(x) x[0] x[1] ... }
! x>2^1024 => B{ 0xff length(x) x[0] x[1] ... }
! The last case is needed because a very large number would
! otherwise be confused with a small number.
: serialize-cell ( n -- )
dup zero? [ drop 0 write1 ] [
dup HEX: 7e <= [
HEX: 80 bitor write1
] [
dup log2 8 /i 1+
dup HEX: 7f >= [
HEX: ff write1
dup serialize-cell
] [
dup write1
] if
>be write
] if
] if ;
2007-09-20 18:09:08 -04:00
2008-03-08 03:51:26 -05:00
: deserialize-cell ( -- n )
read1 {
{ [ dup HEX: ff = ] [ drop deserialize-cell read be> ] }
{ [ dup HEX: 80 >= ] [ HEX: 80 bitxor ] }
{ [ t ] [ read be> ] }
} cond ;
2007-09-20 18:09:08 -04:00
: serialize-shared ( obj quot -- )
>r dup object-id
2008-03-08 03:51:26 -05:00
[ CHAR: o write1 serialize-cell drop ] r> if* ; inline
2007-09-20 18:09:08 -04:00
M: f (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
drop CHAR: n write1 ;
2007-09-20 18:09:08 -04:00
M: integer (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
dup zero? [
drop CHAR: z write1
2007-09-20 18:09:08 -04:00
] [
2008-03-08 03:51:26 -05:00
dup 0 < [ neg CHAR: m ] [ CHAR: p ] if write1
serialize-cell
2007-09-20 18:09:08 -04:00
] if ;
M: float (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
CHAR: F write1
2007-09-20 18:09:08 -04:00
double>bits serialize-cell ;
M: complex (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
CHAR: c write1
dup real-part (serialize)
imaginary-part (serialize) ;
2007-09-20 18:09:08 -04:00
M: ratio (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
CHAR: r write1
2007-09-20 18:09:08 -04:00
dup numerator (serialize)
denominator (serialize) ;
2008-03-08 03:51:26 -05:00
: serialize-string ( obj code -- )
write1
2008-03-08 04:50:03 -05:00
dup utf8 encode dup length serialize-cell write
add-object ;
2008-03-08 03:51:26 -05:00
2007-09-20 18:09:08 -04:00
M: string (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
[ CHAR: s serialize-string ] serialize-shared ;
2007-09-20 18:09:08 -04:00
2008-03-08 03:51:26 -05:00
: serialize-elements
[ (serialize) ] each CHAR: . write1 ;
2007-09-20 18:09:08 -04:00
M: tuple (serialize) ( obj -- )
[
2008-03-08 03:51:26 -05:00
CHAR: T write1
2008-03-08 04:50:03 -05:00
dup tuple>array serialize-elements
add-object
2007-09-20 18:09:08 -04:00
] serialize-shared ;
: serialize-seq ( seq code -- )
[
2008-03-08 03:51:26 -05:00
write1
2008-03-08 04:50:03 -05:00
dup serialize-elements
add-object
2007-09-20 18:09:08 -04:00
] curry serialize-shared ;
M: array (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
CHAR: a serialize-seq ;
2007-09-20 18:09:08 -04:00
M: byte-array (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
[
CHAR: A write1
2008-03-08 04:50:03 -05:00
dup dup length serialize-cell write
add-object
2008-03-08 03:51:26 -05:00
] serialize-shared ;
2007-09-20 18:09:08 -04:00
M: bit-array (serialize) ( obj -- )
[
2008-03-08 03:51:26 -05:00
CHAR: b write1
dup length serialize-cell
2008-03-08 04:50:03 -05:00
dup [ 1 0 ? ] B{ } map-as write
add-object
2007-09-20 18:09:08 -04:00
] serialize-shared ;
2008-03-08 03:51:26 -05:00
M: quotation (serialize) ( obj -- )
CHAR: q serialize-seq ;
2007-09-20 18:09:08 -04:00
M: float-array (serialize) ( obj -- )
[
2008-03-08 03:51:26 -05:00
CHAR: f write1
2007-09-20 18:09:08 -04:00
dup length serialize-cell
2008-03-08 04:50:03 -05:00
dup [ double>bits 8 >be write ] each
add-object
2007-09-20 18:09:08 -04:00
] serialize-shared ;
M: hashtable (serialize) ( obj -- )
[
2008-03-08 03:51:26 -05:00
CHAR: h write1
2008-03-08 04:50:03 -05:00
dup >alist (serialize)
add-object
2007-09-20 18:09:08 -04:00
] serialize-shared ;
M: word (serialize) ( obj -- )
2008-03-08 04:50:03 -05:00
[
CHAR: w write1
dup word-name (serialize)
dup word-vocabulary (serialize)
add-object
] serialize-shared ;
2007-09-20 18:09:08 -04:00
M: wrapper (serialize) ( obj -- )
2008-03-08 03:51:26 -05:00
CHAR: W write1
2007-09-20 18:09:08 -04:00
wrapped (serialize) ;
DEFER: (deserialize) ( -- obj )
2008-03-08 04:50:03 -05:00
SYMBOL: deserialized
: intern-object ( obj -- )
deserialized get push ;
2007-09-20 18:09:08 -04:00
: deserialize-false ( -- f )
f ;
: deserialize-positive-integer ( -- number )
2008-03-08 03:51:26 -05:00
deserialize-cell ;
2007-09-20 18:09:08 -04:00
: deserialize-negative-integer ( -- number )
deserialize-positive-integer neg ;
: deserialize-zero ( -- number )
0 ;
: deserialize-float ( -- float )
deserialize-cell bits>double ;
: deserialize-ratio ( -- ratio )
(deserialize) (deserialize) / ;
: deserialize-complex ( -- complex )
(deserialize) (deserialize) rect> ;
2008-03-08 03:51:26 -05:00
: (deserialize-string) ( -- string )
deserialize-cell read utf8 decode ;
2007-09-20 18:09:08 -04:00
2008-03-08 03:51:26 -05:00
: deserialize-string ( -- string )
2008-03-08 04:50:03 -05:00
(deserialize-string) dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-word ( -- word )
(deserialize) dup (deserialize) lookup
2008-03-08 04:50:03 -05:00
[ dup intern-object ] [ "Unknown word" throw ] ?if ;
2007-09-20 18:09:08 -04:00
: deserialize-wrapper ( -- wrapper )
(deserialize) <wrapper> ;
2008-03-08 03:51:26 -05:00
SYMBOL: +stop+
2008-03-08 04:50:03 -05:00
: (deserialize-seq) ( -- seq )
2008-03-08 03:51:26 -05:00
[ (deserialize) dup +stop+ get eq? not ] [ ] [ drop ] unfold ;
2007-09-20 18:09:08 -04:00
: deserialize-seq ( seq -- array )
2008-03-08 04:50:03 -05:00
>r (deserialize-seq) r> like dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-array ( -- array )
{ } deserialize-seq ;
: deserialize-quotation ( -- array )
[ ] deserialize-seq ;
2008-03-08 03:51:26 -05:00
: (deserialize-byte-array) ( -- byte-array )
deserialize-cell read B{ } like ;
2007-09-20 18:09:08 -04:00
: deserialize-byte-array ( -- byte-array )
2008-03-08 04:50:03 -05:00
(deserialize-byte-array) dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-bit-array ( -- bit-array )
2008-03-08 03:51:26 -05:00
(deserialize-byte-array) [ 0 > ] ?{ } map-as
2008-03-08 04:50:03 -05:00
dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-float-array ( -- float-array )
2008-03-08 04:50:03 -05:00
deserialize-cell
2007-09-20 18:09:08 -04:00
8 * read 8 <groups> [ be> bits>double ] F{ } map-as
2008-03-08 04:50:03 -05:00
dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-hashtable ( -- hashtable )
2008-03-08 04:50:03 -05:00
(deserialize) >hashtable dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-tuple ( -- array )
2008-03-08 04:50:03 -05:00
(deserialize-seq) >tuple dup intern-object ;
2007-09-20 18:09:08 -04:00
: deserialize-unknown ( -- object )
2008-03-08 04:50:03 -05:00
deserialize-cell deserialized get nth ;
2007-09-20 18:09:08 -04:00
2008-03-08 03:51:26 -05:00
: deserialize-stop ( -- object )
+stop+ get ;
2007-09-20 18:09:08 -04:00
: deserialize* ( -- object ? )
read1 [
2008-03-08 03:51:26 -05:00
{
{ CHAR: A [ deserialize-byte-array ] }
{ CHAR: F [ deserialize-float ] }
{ CHAR: T [ deserialize-tuple ] }
{ CHAR: W [ deserialize-wrapper ] }
{ CHAR: a [ deserialize-array ] }
{ CHAR: b [ deserialize-bit-array ] }
{ CHAR: c [ deserialize-complex ] }
{ CHAR: f [ deserialize-float-array ] }
{ CHAR: h [ deserialize-hashtable ] }
{ CHAR: m [ deserialize-negative-integer ] }
{ CHAR: n [ deserialize-false ] }
{ CHAR: o [ deserialize-unknown ] }
{ CHAR: p [ deserialize-positive-integer ] }
{ CHAR: q [ deserialize-quotation ] }
{ CHAR: r [ deserialize-ratio ] }
{ CHAR: s [ deserialize-string ] }
{ CHAR: w [ deserialize-word ] }
{ CHAR: z [ deserialize-zero ] }
{ CHAR: . [ deserialize-stop ] }
} case t
2007-09-20 18:09:08 -04:00
] [
f f
] if* ;
: (deserialize) ( -- obj )
deserialize* [ "End of stream" throw ] unless ;
2008-03-08 04:50:03 -05:00
: deserialize ( -- obj )
2008-03-08 03:51:26 -05:00
[
2008-03-08 04:50:03 -05:00
V{ } clone deserialized set
2008-03-08 03:51:26 -05:00
gensym +stop+ set
2008-03-08 04:50:03 -05:00
(deserialize)
] with-scope ;
2007-09-20 18:09:08 -04:00
: serialize ( obj -- )
2008-03-08 04:50:03 -05:00
[
H{ } clone serialized set
(serialize)
] with-scope ;