dbf: beginning to parse dBase database files.
parent
85b69b5b49
commit
7c5012da67
|
@ -0,0 +1 @@
|
|||
John Benediktsson
|
|
@ -0,0 +1,312 @@
|
|||
! Copyright (C) 2018 John Benediktsson
|
||||
! See http://factorcode.org/license.txt for BSD license
|
||||
|
||||
USING: accessors alien.c-types alien.endian alien.strings assocs
|
||||
calendar calendar.parser classes.struct combinators fry io
|
||||
io.binary io.encodings.8-bit io.encodings.ascii
|
||||
io.encodings.binary io.encodings.string io.files
|
||||
io.streams.byte-array kernel math math.parser namespaces
|
||||
sequences splitting strings ;
|
||||
|
||||
IN: dbf
|
||||
|
||||
SYMBOL: dbf-encoding
|
||||
|
||||
CONSTANT: dbf-encodings H{
|
||||
{ 0x00 ascii }
|
||||
{ 0x01 cp437 }
|
||||
{ 0x02 cp850 }
|
||||
{ 0x03 windows-1252 }
|
||||
{ 0x04 mac-roman }
|
||||
{ 0x08 cp865 }
|
||||
{ 0x09 cp437 }
|
||||
{ 0x0A cp850 }
|
||||
{ 0x0B cp437 }
|
||||
{ 0x0D cp437 }
|
||||
{ 0x0E cp850 }
|
||||
{ 0x0F cp437 }
|
||||
{ 0x10 cp850 }
|
||||
{ 0x11 cp437 }
|
||||
{ 0x12 cp850 }
|
||||
{ 0x13 "cp932" }
|
||||
{ 0x14 cp850 }
|
||||
{ 0x15 cp437 }
|
||||
{ 0x16 cp850 }
|
||||
{ 0x17 cp865 }
|
||||
{ 0x18 cp437 }
|
||||
{ 0x19 cp437 }
|
||||
{ 0x1A cp850 }
|
||||
{ 0x1B cp437 }
|
||||
{ 0x1C cp863 }
|
||||
{ 0x1D cp850 }
|
||||
{ 0x1F cp852 }
|
||||
{ 0x22 cp852 }
|
||||
{ 0x23 cp852 }
|
||||
{ 0x24 cp860 }
|
||||
{ 0x25 cp850 }
|
||||
{ 0x26 cp866 }
|
||||
{ 0x37 cp850 }
|
||||
{ 0x40 cp852 }
|
||||
{ 0x4D "cp936" }
|
||||
{ 0x4E "cp949" }
|
||||
{ 0x4F "cp950" }
|
||||
{ 0x50 "cp874" }
|
||||
{ 0x57 windows-1252 }
|
||||
{ 0x58 windows-1252 }
|
||||
{ 0x59 windows-1252 }
|
||||
{ 0x64 cp852 }
|
||||
{ 0x65 cp866 }
|
||||
{ 0x66 cp865 }
|
||||
{ 0x67 cp861 }
|
||||
{ 0x68 f }
|
||||
{ 0x69 f }
|
||||
{ 0x6a "cp737" }
|
||||
{ 0x6b cp857 }
|
||||
{ 0x6c cp863 }
|
||||
{ 0x78 "cp950" }
|
||||
{ 0x79 "cp949" }
|
||||
{ 0x7a "cp936" }
|
||||
{ 0x7b "cp932" }
|
||||
{ 0x7c "cp874" }
|
||||
{ 0x7d windows-1255 }
|
||||
{ 0x7e windows-1256 }
|
||||
{ 0x86 "cp737" }
|
||||
{ 0x87 cp852 }
|
||||
{ 0x88 cp857 }
|
||||
{ 0x96 "mac-cyrillic" }
|
||||
{ 0x97 "mac-latin2" }
|
||||
{ 0x98 "mac-greek" }
|
||||
{ 0xc8 windows-1250 }
|
||||
{ 0xc9 windows-1251 }
|
||||
{ 0xca windows-1254 }
|
||||
{ 0xcb windows-1253 }
|
||||
{ 0xcc windows-1250 }
|
||||
}
|
||||
|
||||
CONSTANT: dbf-file-types H{
|
||||
{ 0x02 "FoxBASE" }
|
||||
{ 0x03 "FoxBASE+/dBase III plus, no memo" }
|
||||
{ 0x04 "dBase IV, no memo" }
|
||||
{ 0x05 "dBase V, no memo" }
|
||||
{ 0x07 "Visual Objects 1.x" }
|
||||
{ 0x30 "Visual FoxPro" }
|
||||
{ 0x31 "Visual FoxPro, autoincrement enabled" }
|
||||
{ 0x32 "Visual FoxPro with field type Varchar or Varbinary" }
|
||||
{ 0x43 "dBase IV SQL table files, no memo" }
|
||||
{ 0x63 "dBase IV SQL system files, no memo" }
|
||||
{ 0x7b "dBase IV, with memo" }
|
||||
{ 0x83 "FoxBASE+/dBase III PLUS, with memo" }
|
||||
{ 0x87 "Visual Objects 1.x, with memo" }
|
||||
{ 0x8B "dBase IV with memo" }
|
||||
{ 0x8E "dBase IV with SQL table" }
|
||||
{ 0xCB "dBase IV SQL table files, with memo" }
|
||||
{ 0xE5 "HiPer-Six format with SMT memo file" }
|
||||
{ 0xF5 "FoxPro 2.x (or earlier) with memo" }
|
||||
{ 0xFB "FoxBASE" }
|
||||
}
|
||||
|
||||
STRUCT: dbf-file-header
|
||||
{ file-type uint8_t }
|
||||
{ year uint8_t }
|
||||
{ month uint8_t }
|
||||
{ day uint8_t }
|
||||
{ #records uint32_t }
|
||||
{ header-length uint16_t }
|
||||
{ record-length uint16_t }
|
||||
{ reserved1 uint16_t }
|
||||
{ incomplete-transaction uint8_t }
|
||||
{ encryption-flag uint8_t }
|
||||
{ free-record-thread uint32_t }
|
||||
{ reserved2 uint32_t }
|
||||
{ reserved3 uint32_t }
|
||||
{ mdx-flag uint8_t }
|
||||
{ language-driver uint8_t }
|
||||
{ reserved4 uint16_t } ;
|
||||
|
||||
: read-file-header ( -- file-header )
|
||||
dbf-file-header read-struct ;
|
||||
|
||||
CONSTANT: dbf-field-flags H{
|
||||
{ 0x01 "System Column (not visible to user)" }
|
||||
{ 0x02 "Column can store null values" }
|
||||
{ 0x04 "Binary column (for CHAR and MEMO only)" }
|
||||
{ 0x06 "(0x02+0x04) When a field is NULL and binary (Integer, Currency, and Character/Memo fields)" }
|
||||
{ 0x0C "Column is autoincrementing" }
|
||||
}
|
||||
|
||||
STRUCT: dbf-field-header
|
||||
{ name uint8_t[11] }
|
||||
{ type uint8_t }
|
||||
{ address uint32_t }
|
||||
{ length uint8_t }
|
||||
{ #decimals uint8_t }
|
||||
{ reserved1 uint16_t }
|
||||
{ workarea-id uint8_t }
|
||||
{ reserved2 uint8_t }
|
||||
{ reserved3 uint8_t }
|
||||
{ set-fields-flag uint8_t }
|
||||
{ reserved4 uint8_t[7] }
|
||||
{ index-field-flag uint8_t } ;
|
||||
|
||||
: read-field-headers ( -- field-headers )
|
||||
[ read1 dup { CHAR: \r CHAR: \n f } member? not ] [
|
||||
dbf-field-header heap-size 1 - read swap prefix
|
||||
dbf-field-header memory>struct
|
||||
] produce nip ;
|
||||
|
||||
: check-field-header ( field-header -- field-header )
|
||||
dup type>> {
|
||||
{ CHAR: I [ dup length>> 4 assert= ] }
|
||||
{ CHAR: L [ dup length>> 1 assert= ] }
|
||||
{ CHAR: O [ dup length>> 8 assert= ] }
|
||||
{ CHAR: Y [ dup length>> 8 assert= ] }
|
||||
{ CHAR: D [ dup length>> 8 assert= ] }
|
||||
{ CHAR: T [ dup length>> 8 assert= ] }
|
||||
{ CHAR: M [ dup length>> 10 assert= ] }
|
||||
[ drop ]
|
||||
} case ;
|
||||
|
||||
: check-record-length ( file-header field-headers -- )
|
||||
[ record-length>> ] [ [ length>> ] map-sum ] bi* assert= ;
|
||||
|
||||
DEFER: parse-field
|
||||
|
||||
TUPLE: record deleted? values ;
|
||||
|
||||
: read-records ( field-headers -- records )
|
||||
[ read1 dup { 0x1a f } member? not ]
|
||||
[
|
||||
CHAR: * = over [
|
||||
[ length>> read ]
|
||||
[ type>> parse-field ] bi
|
||||
] map record boa
|
||||
] produce 2nip ;
|
||||
|
||||
TUPLE: dbf file-header field-headers records ;
|
||||
|
||||
: load-dbf ( path -- dbf )
|
||||
binary [
|
||||
read-file-header
|
||||
read-field-headers
|
||||
over header-length>> seek-absolute seek-input
|
||||
over language-driver>> dbf-encodings at dbf-encoding [
|
||||
dup read-records dbf boa
|
||||
] with-variable
|
||||
] with-file-reader ;
|
||||
|
||||
: seek-record ( n file-header -- )
|
||||
[ record-length>> * ] [ header-length>> ] bi +
|
||||
seek-absolute seek-input ;
|
||||
|
||||
: parse-string ( byte-array -- string )
|
||||
[ " \0" member? ] trim-tail dbf-encoding get decode ;
|
||||
|
||||
: parse-date ( byte-array -- date/f )
|
||||
dup [ " \0" member? ] all? [ drop f ] [
|
||||
binary [ (ymd>timestamp) ] with-byte-reader
|
||||
] if ;
|
||||
|
||||
: parse-float ( byte-array -- n )
|
||||
[ "\r\n\t *" member? ] trim string>number ;
|
||||
|
||||
: parse-int ( byte-array -- n )
|
||||
dup length 4 assert= le> ;
|
||||
|
||||
: parse-short ( byte-array -- n )
|
||||
dup length 2 assert= le> ;
|
||||
|
||||
SYMBOL: unknown
|
||||
|
||||
ERROR: illegal-logical value ;
|
||||
|
||||
: parse-logical ( byte-array -- n )
|
||||
first {
|
||||
{ [ dup "TtYy" member? ] [ drop t ] }
|
||||
{ [ dup "FfNn" member? ] [ drop f ] }
|
||||
{ [ dup "? " member? ] [ drop unknown ] }
|
||||
[ illegal-logical ]
|
||||
} cond ;
|
||||
|
||||
: parse-numeric ( byte-array -- n )
|
||||
[ "\r\n\t *" member? ] trim "," "." replace string>number ;
|
||||
|
||||
: parse-double ( byte-array -- n )
|
||||
dup length 8 assert= le> bits>double ;
|
||||
|
||||
: parse-currency ( byte-array -- n )
|
||||
dup length 8 assert= le> 10000 / ;
|
||||
|
||||
: parse-timestamp ( byte-array -- timestamp )
|
||||
[ -4713 1 1 <date> ] dip 4 cut [ le> ] bi@
|
||||
[ days time+ ] [ milliseconds time+ ] bi* ;
|
||||
|
||||
ERROR: unsupported-field-type type ;
|
||||
|
||||
: parse-field ( byte-array type -- data )
|
||||
{
|
||||
{ CHAR: \0 [ ] }
|
||||
{ CHAR: 2 [ parse-short ] }
|
||||
{ CHAR: 4 [ parse-int ] }
|
||||
{ CHAR: 8 [ parse-double ] }
|
||||
{ CHAR: C [ parse-string ] }
|
||||
{ CHAR: D [ parse-date ] }
|
||||
{ CHAR: F [ parse-float ] }
|
||||
{ CHAR: I [ parse-int ] }
|
||||
{ CHAR: L [ parse-logical ] }
|
||||
{ CHAR: N [ parse-numeric ] }
|
||||
{ CHAR: O [ parse-double ] }
|
||||
{ CHAR: V [ parse-string ] }
|
||||
{ CHAR: Y [ parse-currency ] }
|
||||
{ CHAR: @ [ parse-timestamp ] }
|
||||
! { CHAR: + [ parse-autoincrement ] }
|
||||
! { CHAR: M [ parse-memo ] }
|
||||
! { CHAR: T [ parse-datetime ] }
|
||||
! { CHAR: B [ parse-double? ] } ! (only on dbversion in [0x30, 0x31, 0x32])
|
||||
! { CHAR: G [ parse-general ] }
|
||||
! { CHAR: P [ parse-picture ] }
|
||||
! { CHAR: Q [ parse-varbinary ] }
|
||||
[ unsupported-field-type ]
|
||||
} case ;
|
||||
|
||||
: dbase3-memo ( n path -- data )
|
||||
binary [
|
||||
512 * seek-absolute seek-input
|
||||
B{ } [
|
||||
512 read
|
||||
dup [ B{ 0 0x1a } member? ] find drop
|
||||
[ head f ] [ t ] if* [ append ] dip
|
||||
] loop
|
||||
] with-file-reader ;
|
||||
|
||||
LE-STRUCT: db4-memo-header
|
||||
{ reserved uint } ! B{ 0xff 0xff 0x08 0x08 }
|
||||
{ length uint } ;
|
||||
|
||||
: dbase4-memo ( n path -- data )
|
||||
binary [
|
||||
512 * seek-absolute seek-input
|
||||
db4-memo-header read-struct length>> read
|
||||
] with-file-reader ;
|
||||
|
||||
BE-STRUCT: vfp-file-header
|
||||
{ nextblock uint }
|
||||
{ reserved1 ushort }
|
||||
{ blocksize ushort }
|
||||
{ reserved2 uchar[504] } ;
|
||||
|
||||
BE-STRUCT: vfp-memo-header
|
||||
{ type uint }
|
||||
{ length uint } ;
|
||||
|
||||
CONSTANT: vfp-memo-types H{
|
||||
{ 0x0 "picture memo" }
|
||||
{ 0x1 "text memo" }
|
||||
{ 0x2 "object memo" }
|
||||
}
|
||||
|
||||
: vfp-memo ( n path -- data )
|
||||
binary [
|
||||
vfp-file-header read-struct blocksize>> *
|
||||
seek-absolute seek-input
|
||||
vfp-memo-header read-struct length>> read
|
||||
] with-file-reader ;
|
|
@ -0,0 +1 @@
|
|||
Parsers for various dBase database files
|
Loading…
Reference in New Issue