factor/extra/dbf/dbf.factor

314 lines
8.4 KiB
Factor

! Copyright (C) 2018 John Benediktsson
! See http://factorcode.org/license.txt for BSD license
USING: accessors alien.c-types alien.endian alien.strings assocs
calendar calendar.parser classes.struct combinators fry io
io.binary io.encodings.8-bit io.encodings.ascii
io.encodings.binary io.encodings.string io.files
io.streams.byte-array kernel math math.parser namespaces
sequences splitting strings ;
IN: dbf
SYMBOL: dbf-encoding
CONSTANT: dbf-encodings H{
{ 0x00 ascii }
{ 0x01 cp437 }
{ 0x02 cp850 }
{ 0x03 windows-1252 }
{ 0x04 mac-roman }
{ 0x08 cp865 }
{ 0x09 cp437 }
{ 0x0A cp850 }
{ 0x0B cp437 }
{ 0x0D cp437 }
{ 0x0E cp850 }
{ 0x0F cp437 }
{ 0x10 cp850 }
{ 0x11 cp437 }
{ 0x12 cp850 }
{ 0x13 "cp932" }
{ 0x14 cp850 }
{ 0x15 cp437 }
{ 0x16 cp850 }
{ 0x17 cp865 }
{ 0x18 cp437 }
{ 0x19 cp437 }
{ 0x1A cp850 }
{ 0x1B cp437 }
{ 0x1C cp863 }
{ 0x1D cp850 }
{ 0x1F cp852 }
{ 0x22 cp852 }
{ 0x23 cp852 }
{ 0x24 cp860 }
{ 0x25 cp850 }
{ 0x26 cp866 }
{ 0x37 cp850 }
{ 0x40 cp852 }
{ 0x4D "cp936" }
{ 0x4E "cp949" }
{ 0x4F "cp950" }
{ 0x50 "cp874" }
{ 0x57 windows-1252 }
{ 0x58 windows-1252 }
{ 0x59 windows-1252 }
{ 0x64 cp852 }
{ 0x65 cp866 }
{ 0x66 cp865 }
{ 0x67 cp861 }
{ 0x68 f }
{ 0x69 f }
{ 0x6a "cp737" }
{ 0x6b cp857 }
{ 0x6c cp863 }
{ 0x78 "cp950" }
{ 0x79 "cp949" }
{ 0x7a "cp936" }
{ 0x7b "cp932" }
{ 0x7c "cp874" }
{ 0x7d windows-1255 }
{ 0x7e windows-1256 }
{ 0x86 "cp737" }
{ 0x87 cp852 }
{ 0x88 cp857 }
{ 0x96 "mac-cyrillic" }
{ 0x97 "mac-latin2" }
{ 0x98 "mac-greek" }
{ 0xc8 windows-1250 }
{ 0xc9 windows-1251 }
{ 0xca windows-1254 }
{ 0xcb windows-1253 }
{ 0xcc windows-1250 }
}
CONSTANT: dbf-file-types H{
{ 0x02 "FoxBASE" }
{ 0x03 "FoxBASE+/dBase III plus, no memo" }
{ 0x04 "dBase IV, no memo" }
{ 0x05 "dBase V, no memo" }
{ 0x07 "Visual Objects 1.x" }
{ 0x30 "Visual FoxPro" }
{ 0x31 "Visual FoxPro, autoincrement enabled" }
{ 0x32 "Visual FoxPro with field type Varchar or Varbinary" }
{ 0x43 "dBase IV SQL table files, no memo" }
{ 0x63 "dBase IV SQL system files, no memo" }
{ 0x7b "dBase IV, with memo" }
{ 0x83 "FoxBASE+/dBase III PLUS, with memo" }
{ 0x87 "Visual Objects 1.x, with memo" }
{ 0x8B "dBase IV with memo" }
{ 0x8E "dBase IV with SQL table" }
{ 0xCB "dBase IV SQL table files, with memo" }
{ 0xE5 "HiPer-Six format with SMT memo file" }
{ 0xF5 "FoxPro 2.x (or earlier) with memo" }
{ 0xFB "FoxBASE" }
}
STRUCT: dbf-file-header
{ file-type uint8_t }
{ year uint8_t }
{ month uint8_t }
{ day uint8_t }
{ #records uint32_t }
{ header-length uint16_t }
{ record-length uint16_t }
{ reserved1 uint16_t }
{ incomplete-transaction uint8_t }
{ encryption-flag uint8_t }
{ free-record-thread uint32_t }
{ reserved2 uint32_t }
{ reserved3 uint32_t }
{ mdx-flag uint8_t }
{ language-driver uint8_t }
{ reserved4 uint16_t } ;
: read-file-header ( -- file-header )
dbf-file-header read-struct ;
CONSTANT: dbf-field-flags H{
{ 0x01 "System Column (not visible to user)" }
{ 0x02 "Column can store null values" }
{ 0x04 "Binary column (for CHAR and MEMO only)" }
{ 0x06 "(0x02+0x04) When a field is NULL and binary (Integer, Currency, and Character/Memo fields)" }
{ 0x0C "Column is autoincrementing" }
}
STRUCT: dbf-field-header
{ name uint8_t[11] }
{ type uint8_t }
{ address uint32_t }
{ length uint8_t }
{ #decimals uint8_t }
{ reserved1 uint16_t }
{ workarea-id uint8_t }
{ reserved2 uint8_t }
{ reserved3 uint8_t }
{ set-fields-flag uint8_t }
{ reserved4 uint8_t[7] }
{ index-field-flag uint8_t } ;
: read-field-headers ( -- field-headers )
[ read1 dup { CHAR: \r CHAR: \n f } member? not ] [
dbf-field-header heap-size 1 - read swap prefix
dbf-field-header memory>struct
] produce nip ;
: check-field-header ( field-header -- field-header )
dup type>> {
{ CHAR: I [ dup length>> 4 assert= ] }
{ CHAR: L [ dup length>> 1 assert= ] }
{ CHAR: O [ dup length>> 8 assert= ] }
{ CHAR: Y [ dup length>> 8 assert= ] }
{ CHAR: D [ dup length>> 8 assert= ] }
{ CHAR: T [ dup length>> 8 assert= ] }
{ CHAR: M [ dup length>> 10 assert= ] }
[ drop ]
} case ;
: check-record-length ( file-header field-headers -- )
[ record-length>> ] [ [ length>> ] map-sum ] bi* assert= ;
DEFER: parse-field
TUPLE: record deleted? values ;
: read-records ( field-headers -- records )
[ read1 dup { 0x1a f } member? not ]
[
CHAR: * = over [
[ length>> read ]
[ type>> parse-field ] bi
] map record boa
] produce 2nip ;
TUPLE: dbf file-header field-headers records ;
: load-dbf ( path -- dbf )
binary [
read-file-header
read-field-headers
over header-length>> seek-absolute seek-input
over language-driver>> dbf-encodings at dbf-encoding [
dup read-records dbf boa
] with-variable
] with-file-reader ;
: seek-record ( n file-header -- )
[ record-length>> * ] [ header-length>> ] bi +
seek-absolute seek-input ;
: parse-string ( byte-array -- string )
[ " \0" member? ] trim-tail dbf-encoding get decode ;
: parse-date ( byte-array -- date/f )
dup [ " \0" member? ] all? [ drop f ] [
binary [ (ymd>timestamp) ] with-byte-reader
] if ;
: parse-float ( byte-array -- n )
[ "\r\n\t *" member? ] trim string>number ;
: parse-int ( byte-array -- n )
dup length 4 assert= le> ;
: parse-short ( byte-array -- n )
dup length 2 assert= le> ;
SYMBOL: unknown
ERROR: illegal-logical value ;
: parse-logical ( byte-array -- n )
first {
{ [ dup "TtYy" member? ] [ drop t ] }
{ [ dup "FfNn" member? ] [ drop f ] }
{ [ dup "? " member? ] [ drop unknown ] }
[ illegal-logical ]
} cond ;
: parse-numeric ( byte-array -- n )
[ "\r\n\t *" member? ] trim
H{ { CHAR: , CHAR: . } } substitute string>number ;
: parse-double ( byte-array -- n )
dup length 8 assert= le> bits>double ;
: parse-currency ( byte-array -- n )
dup length 8 assert= le> 10000 / ;
: parse-timestamp ( byte-array -- timestamp )
[ -4713 1 1 <date> ] dip 4 cut [ le> ] bi@
[ days time+ ] [ milliseconds time+ ] bi* ;
ERROR: unsupported-field-type type ;
: parse-field ( byte-array type -- data )
{
{ CHAR: \0 [ ] }
{ CHAR: 2 [ parse-short ] }
{ CHAR: 4 [ parse-int ] }
{ CHAR: 8 [ parse-double ] }
{ CHAR: C [ parse-string ] }
{ CHAR: D [ parse-date ] }
{ CHAR: F [ parse-float ] }
{ CHAR: I [ parse-int ] }
{ CHAR: L [ parse-logical ] }
{ CHAR: N [ parse-numeric ] }
{ CHAR: O [ parse-double ] }
{ CHAR: V [ parse-string ] }
{ CHAR: Y [ parse-currency ] }
{ CHAR: @ [ parse-timestamp ] }
! { CHAR: + [ parse-autoincrement ] }
! { CHAR: M [ parse-memo ] }
! { CHAR: T [ parse-datetime ] }
! { CHAR: B [ parse-double? ] } ! (only on dbversion in [0x30, 0x31, 0x32])
! { CHAR: G [ parse-general ] }
! { CHAR: P [ parse-picture ] }
! { CHAR: Q [ parse-varbinary ] }
[ unsupported-field-type ]
} case ;
: dbase3-memo ( n path -- data )
binary [
512 * seek-absolute seek-input
B{ } [
512 read
dup [ B{ 0 0x1a } member? ] find drop
[ head f ] [ t ] if* [ append ] dip
] loop
] with-file-reader ;
LE-STRUCT: db4-memo-header
{ reserved uint } ! B{ 0xff 0xff 0x08 0x08 }
{ length uint } ;
: dbase4-memo ( n path -- data )
binary [
512 * seek-absolute seek-input
db4-memo-header read-struct length>> read
] with-file-reader ;
BE-STRUCT: vfp-file-header
{ nextblock uint }
{ reserved1 ushort }
{ blocksize ushort }
{ reserved2 uchar[504] } ;
BE-STRUCT: vfp-memo-header
{ type uint }
{ length uint } ;
CONSTANT: vfp-memo-types H{
{ 0x0 "picture memo" }
{ 0x1 "text memo" }
{ 0x2 "object memo" }
}
: vfp-memo ( n path -- data )
binary [
vfp-file-header read-struct blocksize>> *
seek-absolute seek-input
vfp-memo-header read-struct length>> read
] with-file-reader ;