英文:
binary.read returns "unexpected EOF" no matter what dBase (.dbf) File I use
问题
以下是要翻译的内容:
func main() {
file, err := os.Open("example.dbf") // 用于读取访问。
if err != nil {
log.Fatal(err)
}
dBaseioReader, err := NewReader(file)
if err != nil {
log.Fatal(err)
}
return nil
}
type dbHeader struct {
Version byte
LastUpdate [3]byte
NumRecords int32
NumBytesInHeader int16
NumBytesInRecord int16
_ [2]byte // 保留字段
IncompatFlag byte
EncryptionFlag byte
MultiUserProcessing [12]byte
MDXProductionFlag byte
LangDriverId byte
_ [2]byte // 保留字段
LangDriverName [32]byte
_ [4]byte // 保留字段
}
type dbFieldDescriptor struct {
FieldName [32]byte
FieldType byte
FieldLen byte
FieldDec byte
_ [2]byte
MDXProductionFlag byte
_ [2]byte
NextAutoIncrement [4]byte
_ [4]byte
}
type DBaseReader struct {
rawInput *bufio.Reader
Header *dbHeader
Fields []*dbFieldDescriptor
recordsLeft int
}
func NewReader(input io.Reader) (dbr *DBaseReader, err error) {
dbr = &DBaseReader{
rawInput: bufio.NewReaderSize(input, 32*1024),
Header: &dbHeader{},
}
err = binary.Read(dbr.rawInput, binary.LittleEndian, dbr.Header)
if err != nil{
return
}
dbr.recordsLeft = int(dbr.Header.NumRecords)
headerBytesLeft := dbr.Header.NumBytesInHeader
headerBytesLeft -= dbHeaderSize
// 读取字段描述符,直到遇到 0x0D 终止字节
var term []byte
for {
field := &dbFieldDescriptor{}
err = binary.Read(dbr.rawInput, binary.LittleEndian, field)
if err != nil{
// 第一个崩溃发生在这里。
return
}
dbr.Fields = append(dbr.Fields, field)
headerBytesLeft -= dbFieldDescriptorSize
// 检查终止字节
term, err = dbr.rawInput.Peek(1)
if err != nil{
return
}
if term[0] == 0x0D {
break
}
}
// 读取终止符
_, err = dbr.rawInput.ReadByte()
if err != nil {
return
}
headerBytesLeft -= 1
if headerBytesLeft > 0 {
err = fmt.Errorf("错误:剩余的头字节数:%d.. 读取属性?!..\n", headerBytesLeft)
return
// headerLeftOver := make([]byte, headerBytesLeft)
// err = binary.Read(dbr.rawInput, binary.LittleEndian, headerLeftOver)
// if err != nil {
// return
// }
// props := &dbFieldProperties{}
// err = binary.Read(dbr.rawInput, binary.LittleEndian, props)
// if err != nil {
// return
// }
// fmt.Printf("Props: %#v\n", props)
}
// 读取直到第一个记录标记
_, err = dbr.rawInput.ReadBytes(' ')
if err != nil {
return
}
return dbr, nil
}
以上是相关的代码。无论我使用什么样的示例 dbf 文件,程序都会崩溃。我不确定为什么会一直出现"Unexpected EOF"错误。我在过去几天里一直在努力解决这个问题,但不幸的是没有成功。
英文:
func main() {
file, err := os.Open("example.dbf") // For read access.
if err != nil {
log.Fatal(err)
}
dBaseioReader, err := NewReader(file)
if err != nil {
log.Fatal(err)
}
return nil
}
type dbHeader struct {
Version byte
LastUpdate [3]byte
NumRecords int32
NumBytesInHeader int16
NumBytesInRecord int16
_ [2]byte //reserved
IncompatFlag byte
EncryptionFlag byte
MultiUserProcessing [12]byte
MDXProductionFlag byte
LangDriverId byte
_ [2]byte //reserved
LangDriverName [32]byte
_ [4]byte //reserved
}
type dbFieldDescriptor struct {
FieldName [32]byte
FieldType byte
FieldLen byte
FieldDec byte
_ [2]byte
MDXProductionFlag byte
_ [2]byte
NextAutoIncrement [4]byte
_ [4]byte
}
type DBaseReader struct {
rawInput *bufio.Reader
Header *dbHeader
Fields []*dbFieldDescriptor
recordsLeft int
}
func NewReader(input io.Reader) (dbr *DBaseReader, err error) {
dbr = &DBaseReader{
rawInput: bufio.NewReaderSize(input, 32*1024),
Header: &dbHeader{},
}
err = binary.Read(dbr.rawInput, binary.LittleEndian, dbr.Header)
if err != nil{
return
}
dbr.recordsLeft = int(dbr.Header.NumRecords)
headerBytesLeft := dbr.Header.NumBytesInHeader
headerBytesLeft -= dbHeaderSize
// read field descriptors until 0x0D termination byte
var term []byte
for {
field := &dbFieldDescriptor{}
err = binary.Read(dbr.rawInput, binary.LittleEndian, field)
if err != nil{
//FIRST CRASH HAPPENS HERE.
return
}
dbr.Fields = append(dbr.Fields, field)
headerBytesLeft -= dbFieldDescriptorSize
// check for terminator byte
term, err = dbr.rawInput.Peek(1)
if err != nil{
return
}
if term[0] == 0x0D {
break
}
}
// read the terminator
_, err = dbr.rawInput.ReadByte()
if err != nil {
return
}
headerBytesLeft -= 1
if headerBytesLeft > 0 {
err = fmt.Errorf("Error: Header Bytes Left: %d.. Read Properties?!..\n", headerBytesLeft)
return
// headerLeftOver := make([]byte, headerBytesLeft)
// err = binary.Read(dbr.rawInput, binary.LittleEndian, headerLeftOver)
// if err != nil {
// return
// }
// props := &dbFieldProperties{}
// err = binary.Read(dbr.rawInput, binary.LittleEndian, props)
// if err != nil {
// return
// }
// fmt.Printf("Props: %#v\n", props)
}
// read until first record marker
_, err = dbr.rawInput.ReadBytes(' ')
if err != nil {
return
}
return dbr, nil
}
Above is the relevant code. Program crashes no matter what example dbf file I use. I'm not sure why I keep getting "Unexpected EOF" error. I've been trying to figure this out over the past few days with no luck unfortunately.
答案1
得分: 2
你没有提供任何证据证明你的文件格式是正确的。在编写程序之前,你应该确认你拥有正确的文件格式。
文件的前256个字节是什么?例如,
hex.go
:
package main
import (
"encoding/hex"
"fmt"
"io/ioutil"
"os"
"strconv"
)
func main() {
if len(os.Args) <= 1 {
fmt.Fprintln(os.Stderr, "usage: hex filename [bytes]")
return
}
data, err := ioutil.ReadFile(os.Args[1])
if err != nil {
fmt.Fprintln(os.Stderr, "filename:", err)
return
}
n := len(data)
if len(os.Args) > 2 {
i, err := strconv.Atoi(os.Args[2])
if err != nil {
fmt.Fprintln(os.Stderr, "bytes:", err)
return
}
if n > i {
n = i
}
}
fmt.Print(hex.Dump(data[:n]))
}
输出结果:
$ go run hex.go example.dbf 256
00000000 03 01 04 18 01 00 00 00 41 07 d0 05 00 00 00 00 |........A.......|
00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 03 00 00 |................|
00000020 54 52 41 43 4b 5f 49 44 00 00 00 43 01 00 00 00 |TRACK_ID...C....|
00000030 0b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000040 4c 4d 55 4c 54 00 00 00 00 00 00 4c 0c 00 00 00 |LMULT......L....|
00000050 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000060 4e 54 41 58 59 45 41 52 00 00 00 4e 0d 00 00 00 |NTAXYEAR...N....|
00000070 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000080 4e 43 4f 55 4e 54 59 43 4f 44 00 4e 11 00 00 00 |NCOUNTYCOD.N....|
00000090 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000000a0 43 50 52 4f 50 41 44 44 00 00 00 43 13 00 00 00 |CPROPADD...C....|
000000b0 3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |<...............|
000000c0 4c 43 4f 4d 4d 49 4e 44 00 00 00 4c 4f 00 00 00 |LCOMMIND...LO...|
000000d0 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000000e0 4c 56 41 43 4c 41 4e 44 00 00 00 4c 50 00 00 00 |LVACLAND...LP...|
000000f0 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
$
但是表级别是什么意思?级别表示其版本。dBASE表格格式是一个随着时间演变的标准。当新版本的dBASE对该格式进行了一些改进时,会给出一个新的格式级别号,与新的dBASE版本相同。例如,我们有级别3、4、5和7对应于dBASE III、dBASE IV、dBASE 5和Visual dBASE 7。没有级别6,因为没有Visual dBASE 6。
级别7带来了许多改进。字段名称可以有多达31个字符(之前最多为10个)。出现了一些新的字段类型(例如,AutoIncrement字段使得在同一表中给两条记录分配相同的编号几乎不可能)。如果你的表格需要被其他软件使用,你可能需要为了兼容性而牺牲这些优势,因为很少有应用程序可以使用级别7的表格。
.dbf文件格式:
文件头的第0个字节的0-2位表示版本号:3表示dBASE Level 5,4表示dBASE Level 7。
英文:
You have provided no evidence that your file format is correct. Before you wrote your program, you should have confirmed that you have the correct format for the file.
What are the first 256 bytes of the file? For example,
hex.go
:
package main
import (
"encoding/hex"
"fmt"
"io/ioutil"
"os"
"strconv"
)
func main() {
if len(os.Args) <= 1 {
fmt.Fprintln(os.Stderr, "usage: hex filename [bytes]")
return
}
data, err := ioutil.ReadFile(os.Args[1])
if err != nil {
fmt.Fprintln(os.Stderr, "filename:", err)
return
}
n := len(data)
if len(os.Args) > 2 {
i, err := strconv.Atoi(os.Args[2])
if err != nil {
fmt.Fprintln(os.Stderr, "bytes:", err)
return
}
if n > i {
n = i
}
}
fmt.Print(hex.Dump(data[:n]))
}
Output:
$ go run hex.go example.dbf 256
00000000 03 01 04 18 01 00 00 00 41 07 d0 05 00 00 00 00 |........A.......|
00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 03 00 00 |................|
00000020 54 52 41 43 4b 5f 49 44 00 00 00 43 01 00 00 00 |TRACK_ID...C....|
00000030 0b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000040 4c 4d 55 4c 54 00 00 00 00 00 00 4c 0c 00 00 00 |LMULT......L....|
00000050 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000060 4e 54 41 58 59 45 41 52 00 00 00 4e 0d 00 00 00 |NTAXYEAR...N....|
00000070 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000080 4e 43 4f 55 4e 54 59 43 4f 44 00 4e 11 00 00 00 |NCOUNTYCOD.N....|
00000090 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000000a0 43 50 52 4f 50 41 44 44 00 00 00 43 13 00 00 00 |CPROPADD...C....|
000000b0 3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |<...............|
000000c0 4c 43 4f 4d 4d 49 4e 44 00 00 00 4c 4f 00 00 00 |LCOMMIND...LO...|
000000d0 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000000e0 4c 56 41 43 4c 41 4e 44 00 00 00 4c 50 00 00 00 |LVACLAND...LP...|
000000f0 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
$
> The table level
>
> But what is the level of a table? The level means its version. The
> dBASE table format is a standard that has evolved over time. When a
> new version of dBASE made some improvements to that format, a new
> format level number was given, identical to the new dBASE version. For
> example, we have levels 3, 4, 5 and 7 corresponding to dBASE III,
> dBASE IV, dBASE 5, and Visual dBASE 7. There is no level 6 because
> there was no Visual dBASE 6.
>
> Level 7 brought many improvements. The field names can have up to 31
> characters (from a maximum of 10 before). Some new fields types have
> appeared (for example, the AutoIncrement field that makes nearly
> impossible to give the same number to two records in the same table).
> If your tables have to be used by other software, you might have to
> sacrifice these advantages for the sake of compatibility, as few
> applications can use a level 7 table.
.dbf File Formats:
The file header byte 0, bits 0-2 indicates the version number: 3 for dBASE Level 5, 4 for dBASE Level 7.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论