314 lines
8.4 KiB
Factor
314 lines
8.4 KiB
Factor
! Copyright (C) 2018 John Benediktsson
|
|
! See http://factorcode.org/license.txt for BSD license
|
|
|
|
USING: accessors alien.c-types alien.endian alien.strings assocs
|
|
calendar calendar.parser classes.struct combinators fry io
|
|
io.binary io.encodings.8-bit io.encodings.ascii
|
|
io.encodings.binary io.encodings.string io.files
|
|
io.streams.byte-array kernel math math.parser namespaces
|
|
sequences splitting strings ;
|
|
|
|
IN: dbf
|
|
|
|
SYMBOL: dbf-encoding
|
|
|
|
CONSTANT: dbf-encodings H{
|
|
{ 0x00 ascii }
|
|
{ 0x01 cp437 }
|
|
{ 0x02 cp850 }
|
|
{ 0x03 windows-1252 }
|
|
{ 0x04 mac-roman }
|
|
{ 0x08 cp865 }
|
|
{ 0x09 cp437 }
|
|
{ 0x0A cp850 }
|
|
{ 0x0B cp437 }
|
|
{ 0x0D cp437 }
|
|
{ 0x0E cp850 }
|
|
{ 0x0F cp437 }
|
|
{ 0x10 cp850 }
|
|
{ 0x11 cp437 }
|
|
{ 0x12 cp850 }
|
|
{ 0x13 "cp932" }
|
|
{ 0x14 cp850 }
|
|
{ 0x15 cp437 }
|
|
{ 0x16 cp850 }
|
|
{ 0x17 cp865 }
|
|
{ 0x18 cp437 }
|
|
{ 0x19 cp437 }
|
|
{ 0x1A cp850 }
|
|
{ 0x1B cp437 }
|
|
{ 0x1C cp863 }
|
|
{ 0x1D cp850 }
|
|
{ 0x1F cp852 }
|
|
{ 0x22 cp852 }
|
|
{ 0x23 cp852 }
|
|
{ 0x24 cp860 }
|
|
{ 0x25 cp850 }
|
|
{ 0x26 cp866 }
|
|
{ 0x37 cp850 }
|
|
{ 0x40 cp852 }
|
|
{ 0x4D "cp936" }
|
|
{ 0x4E "cp949" }
|
|
{ 0x4F "cp950" }
|
|
{ 0x50 "cp874" }
|
|
{ 0x57 windows-1252 }
|
|
{ 0x58 windows-1252 }
|
|
{ 0x59 windows-1252 }
|
|
{ 0x64 cp852 }
|
|
{ 0x65 cp866 }
|
|
{ 0x66 cp865 }
|
|
{ 0x67 cp861 }
|
|
{ 0x68 f }
|
|
{ 0x69 f }
|
|
{ 0x6a "cp737" }
|
|
{ 0x6b cp857 }
|
|
{ 0x6c cp863 }
|
|
{ 0x78 "cp950" }
|
|
{ 0x79 "cp949" }
|
|
{ 0x7a "cp936" }
|
|
{ 0x7b "cp932" }
|
|
{ 0x7c "cp874" }
|
|
{ 0x7d windows-1255 }
|
|
{ 0x7e windows-1256 }
|
|
{ 0x86 "cp737" }
|
|
{ 0x87 cp852 }
|
|
{ 0x88 cp857 }
|
|
{ 0x96 "mac-cyrillic" }
|
|
{ 0x97 "mac-latin2" }
|
|
{ 0x98 "mac-greek" }
|
|
{ 0xc8 windows-1250 }
|
|
{ 0xc9 windows-1251 }
|
|
{ 0xca windows-1254 }
|
|
{ 0xcb windows-1253 }
|
|
{ 0xcc windows-1250 }
|
|
}
|
|
|
|
CONSTANT: dbf-file-types H{
|
|
{ 0x02 "FoxBASE" }
|
|
{ 0x03 "FoxBASE+/dBase III plus, no memo" }
|
|
{ 0x04 "dBase IV, no memo" }
|
|
{ 0x05 "dBase V, no memo" }
|
|
{ 0x07 "Visual Objects 1.x" }
|
|
{ 0x30 "Visual FoxPro" }
|
|
{ 0x31 "Visual FoxPro, autoincrement enabled" }
|
|
{ 0x32 "Visual FoxPro with field type Varchar or Varbinary" }
|
|
{ 0x43 "dBase IV SQL table files, no memo" }
|
|
{ 0x63 "dBase IV SQL system files, no memo" }
|
|
{ 0x7b "dBase IV, with memo" }
|
|
{ 0x83 "FoxBASE+/dBase III PLUS, with memo" }
|
|
{ 0x87 "Visual Objects 1.x, with memo" }
|
|
{ 0x8B "dBase IV with memo" }
|
|
{ 0x8E "dBase IV with SQL table" }
|
|
{ 0xCB "dBase IV SQL table files, with memo" }
|
|
{ 0xE5 "HiPer-Six format with SMT memo file" }
|
|
{ 0xF5 "FoxPro 2.x (or earlier) with memo" }
|
|
{ 0xFB "FoxBASE" }
|
|
}
|
|
|
|
STRUCT: dbf-file-header
|
|
{ file-type uint8_t }
|
|
{ year uint8_t }
|
|
{ month uint8_t }
|
|
{ day uint8_t }
|
|
{ #records uint32_t }
|
|
{ header-length uint16_t }
|
|
{ record-length uint16_t }
|
|
{ reserved1 uint16_t }
|
|
{ incomplete-transaction uint8_t }
|
|
{ encryption-flag uint8_t }
|
|
{ free-record-thread uint32_t }
|
|
{ reserved2 uint32_t }
|
|
{ reserved3 uint32_t }
|
|
{ mdx-flag uint8_t }
|
|
{ language-driver uint8_t }
|
|
{ reserved4 uint16_t } ;
|
|
|
|
: read-file-header ( -- file-header )
|
|
dbf-file-header read-struct ;
|
|
|
|
CONSTANT: dbf-field-flags H{
|
|
{ 0x01 "System Column (not visible to user)" }
|
|
{ 0x02 "Column can store null values" }
|
|
{ 0x04 "Binary column (for CHAR and MEMO only)" }
|
|
{ 0x06 "(0x02+0x04) When a field is NULL and binary (Integer, Currency, and Character/Memo fields)" }
|
|
{ 0x0C "Column is autoincrementing" }
|
|
}
|
|
|
|
STRUCT: dbf-field-header
|
|
{ name uint8_t[11] }
|
|
{ type uint8_t }
|
|
{ address uint32_t }
|
|
{ length uint8_t }
|
|
{ #decimals uint8_t }
|
|
{ reserved1 uint16_t }
|
|
{ workarea-id uint8_t }
|
|
{ reserved2 uint8_t }
|
|
{ reserved3 uint8_t }
|
|
{ set-fields-flag uint8_t }
|
|
{ reserved4 uint8_t[7] }
|
|
{ index-field-flag uint8_t } ;
|
|
|
|
: read-field-headers ( -- field-headers )
|
|
[ read1 dup { CHAR: \r CHAR: \n f } member? not ] [
|
|
dbf-field-header heap-size 1 - read swap prefix
|
|
dbf-field-header memory>struct
|
|
] produce nip ;
|
|
|
|
: check-field-header ( field-header -- field-header )
|
|
dup type>> {
|
|
{ CHAR: I [ dup length>> 4 assert= ] }
|
|
{ CHAR: L [ dup length>> 1 assert= ] }
|
|
{ CHAR: O [ dup length>> 8 assert= ] }
|
|
{ CHAR: Y [ dup length>> 8 assert= ] }
|
|
{ CHAR: D [ dup length>> 8 assert= ] }
|
|
{ CHAR: T [ dup length>> 8 assert= ] }
|
|
{ CHAR: M [ dup length>> 10 assert= ] }
|
|
[ drop ]
|
|
} case ;
|
|
|
|
: check-record-length ( file-header field-headers -- )
|
|
[ record-length>> ] [ [ length>> ] map-sum ] bi* assert= ;
|
|
|
|
DEFER: parse-field
|
|
|
|
TUPLE: record deleted? values ;
|
|
|
|
: read-records ( field-headers -- records )
|
|
[ read1 dup { 0x1a f } member? not ]
|
|
[
|
|
CHAR: * = over [
|
|
[ length>> read ]
|
|
[ type>> parse-field ] bi
|
|
] map record boa
|
|
] produce 2nip ;
|
|
|
|
TUPLE: dbf file-header field-headers records ;
|
|
|
|
: load-dbf ( path -- dbf )
|
|
binary [
|
|
read-file-header
|
|
read-field-headers
|
|
over header-length>> seek-absolute seek-input
|
|
over language-driver>> dbf-encodings at dbf-encoding [
|
|
dup read-records dbf boa
|
|
] with-variable
|
|
] with-file-reader ;
|
|
|
|
: seek-record ( n file-header -- )
|
|
[ record-length>> * ] [ header-length>> ] bi +
|
|
seek-absolute seek-input ;
|
|
|
|
: parse-string ( byte-array -- string )
|
|
[ " \0" member? ] trim-tail dbf-encoding get decode ;
|
|
|
|
: parse-date ( byte-array -- date/f )
|
|
dup [ " \0" member? ] all? [ drop f ] [
|
|
binary [ (ymd>timestamp) ] with-byte-reader
|
|
] if ;
|
|
|
|
: parse-float ( byte-array -- n )
|
|
[ "\r\n\t *" member? ] trim string>number ;
|
|
|
|
: parse-int ( byte-array -- n )
|
|
dup length 4 assert= le> ;
|
|
|
|
: parse-short ( byte-array -- n )
|
|
dup length 2 assert= le> ;
|
|
|
|
SYMBOL: unknown
|
|
|
|
ERROR: illegal-logical value ;
|
|
|
|
: parse-logical ( byte-array -- n )
|
|
first {
|
|
{ [ dup "TtYy" member? ] [ drop t ] }
|
|
{ [ dup "FfNn" member? ] [ drop f ] }
|
|
{ [ dup "? " member? ] [ drop unknown ] }
|
|
[ illegal-logical ]
|
|
} cond ;
|
|
|
|
: parse-numeric ( byte-array -- n )
|
|
[ "\r\n\t *" member? ] trim
|
|
H{ { CHAR: , CHAR: . } } substitute string>number ;
|
|
|
|
: parse-double ( byte-array -- n )
|
|
dup length 8 assert= le> bits>double ;
|
|
|
|
: parse-currency ( byte-array -- n )
|
|
dup length 8 assert= le> 10000 / ;
|
|
|
|
: parse-timestamp ( byte-array -- timestamp )
|
|
[ -4713 1 1 <date> ] dip 4 cut [ le> ] bi@
|
|
[ days time+ ] [ milliseconds time+ ] bi* ;
|
|
|
|
ERROR: unsupported-field-type type ;
|
|
|
|
: parse-field ( byte-array type -- data )
|
|
{
|
|
{ CHAR: \0 [ ] }
|
|
{ CHAR: 2 [ parse-short ] }
|
|
{ CHAR: 4 [ parse-int ] }
|
|
{ CHAR: 8 [ parse-double ] }
|
|
{ CHAR: C [ parse-string ] }
|
|
{ CHAR: D [ parse-date ] }
|
|
{ CHAR: F [ parse-float ] }
|
|
{ CHAR: I [ parse-int ] }
|
|
{ CHAR: L [ parse-logical ] }
|
|
{ CHAR: N [ parse-numeric ] }
|
|
{ CHAR: O [ parse-double ] }
|
|
{ CHAR: V [ parse-string ] }
|
|
{ CHAR: Y [ parse-currency ] }
|
|
{ CHAR: @ [ parse-timestamp ] }
|
|
! { CHAR: + [ parse-autoincrement ] }
|
|
! { CHAR: M [ parse-memo ] }
|
|
! { CHAR: T [ parse-datetime ] }
|
|
! { CHAR: B [ parse-double? ] } ! (only on dbversion in [0x30, 0x31, 0x32])
|
|
! { CHAR: G [ parse-general ] }
|
|
! { CHAR: P [ parse-picture ] }
|
|
! { CHAR: Q [ parse-varbinary ] }
|
|
[ unsupported-field-type ]
|
|
} case ;
|
|
|
|
: dbase3-memo ( n path -- data )
|
|
binary [
|
|
512 * seek-absolute seek-input
|
|
B{ } [
|
|
512 read
|
|
dup [ B{ 0 0x1a } member? ] find drop
|
|
[ head f ] [ t ] if* [ append ] dip
|
|
] loop
|
|
] with-file-reader ;
|
|
|
|
LE-STRUCT: db4-memo-header
|
|
{ reserved uint } ! B{ 0xff 0xff 0x08 0x08 }
|
|
{ length uint } ;
|
|
|
|
: dbase4-memo ( n path -- data )
|
|
binary [
|
|
512 * seek-absolute seek-input
|
|
db4-memo-header read-struct length>> read
|
|
] with-file-reader ;
|
|
|
|
BE-STRUCT: vfp-file-header
|
|
{ nextblock uint }
|
|
{ reserved1 ushort }
|
|
{ blocksize ushort }
|
|
{ reserved2 uchar[504] } ;
|
|
|
|
BE-STRUCT: vfp-memo-header
|
|
{ type uint }
|
|
{ length uint } ;
|
|
|
|
CONSTANT: vfp-memo-types H{
|
|
{ 0x0 "picture memo" }
|
|
{ 0x1 "text memo" }
|
|
{ 0x2 "object memo" }
|
|
}
|
|
|
|
: vfp-memo ( n path -- data )
|
|
binary [
|
|
vfp-file-header read-struct blocksize>> *
|
|
seek-absolute seek-input
|
|
vfp-memo-header read-struct length>> read
|
|
] with-file-reader ;
|