如何在C#中从DBF文件中读取Memo类型字段?

huangapple go评论55阅读模式
英文:

How to read Memo type field from DBF file in C#?

问题

// 这是一个用于读取DBF文件的C#类。它能够解析DBF文件的头部信息以及记录,并将其转化为DataTable、Dictionary或自定义类的形式。

// 以下是类的构造函数和一些成员变量的初始化。

public class DBFReader : IDisposable
{
    private BinaryReader reader;
    private Encoding encoding;

    public DBFReader(Stream stream, Encoding encoding)
    {
        // ...
    }

    public DBFReader(string filename, Encoding encoding)
    {
        // ...
    }

    private void ReadHeader()
    {
        // ...
    }

    private void ReadRecords()
    {
        // ...
    }

    // ...

    private struct DBFHeader
    {
        // ...
    }

    private struct DBFFieldDescriptor
    {
        // ...
    }

    // ...
}

// 这是一个用于将Julian日期转换为.NET DateTime结构的静态方法。

private static DateTime JulianToDateTime(long julianDateAsLong)
{
    // ...
}

// 这是一个用于将DBF文件的字段类型转换为相应.NET数据类型的静态方法。

public static Type ToDbType(char type)
{
    // ...
}

如果你需要了解更多关于如何使用这个类以及如何读取Memo类型字段的信息,请参考提供的链接。

英文:

I am unable to find the solution that how to read Memo type field from DBF file.

As per my current RnD, I found out that the "Memo" data is stored in a separate file with the same name as the DBF file, but with a ".fpt" file extension. The "Memo" data file consists of a series of blocks, each of which contains a fixed number of bytes (typically 512 bytes), and each block is identified by a block number.

The "Memo" column in the DBF file contains a 10-byte pointer that identifies the block number of the first block of data for the corresponding record. If the data for a particular record exceeds the size of one block, additional blocks are allocated as needed and linked together using the pointers stored in the first block.

Moreover I used the source code provided here

using System;
using System.Collections.Generic;
using System.Data;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Text;
namespace System.IO
{
/// <summary>
/// This class reads a dbf files
/// </summary>
public class DBFReader : IDisposable
{
private BinaryReader reader;
private Encoding encoding;
public DBFReader(Stream stream, Encoding encoding)
{
this.encoding = encoding;
this.reader = new BinaryReader(stream, encoding);
ReadHeader();
}
public DBFReader(string filename, Encoding encoding)
{
if (File.Exists(filename) == false)
throw new FileNotFoundException();
this.encoding = encoding;
var bs = new BufferedStream(File.OpenRead(filename));
this.reader = new BinaryReader(bs, encoding);
ReadHeader();
}
private void ReadHeader()
{
byte[] buffer = reader.ReadBytes(Marshal.SizeOf(typeof(DBFHeader)));
// Marshall the header into a DBFHeader structure
GCHandle handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
this.header = (DBFHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(DBFHeader));
handle.Free();
fields = new List<DBFFieldDescriptor>();
while (reader.PeekChar() != 13)
{
buffer = reader.ReadBytes(Marshal.SizeOf(typeof(DBFFieldDescriptor)));
handle = GCHandle.Alloc(buffer, GCHandleType.Pinned);
var fieldDescriptor = (DBFFieldDescriptor)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(DBFFieldDescriptor));
if ((fieldDescriptor.Flags & DBFFieldFlags.System) != DBFFieldFlags.System )
{
fields.Add(fieldDescriptor);
}
handle.Free();
}
byte headerTerminator = reader.ReadByte();
byte[] backlink = reader.ReadBytes(263);
}
private void ReadRecords()
{
records = new List<Dictionary<DBFFieldDescriptor, object>>();
// Skip back to the end of the header. 
reader.BaseStream.Seek(header.HeaderLenght, SeekOrigin.Begin);
for (int i = 0; i < header.NumberOfRecords; i++)
{
if (reader.PeekChar() == '*') // DELETED
{
continue;
}
var record = new Dictionary<DBFFieldDescriptor, object>();
var row = reader.ReadBytes(header.RecordLenght);
foreach (var field in fields)
{
byte[] buffer = new byte[field.FieldLength];
Array.Copy(row, field.Address, buffer, 0, field.FieldLength);
string text = (encoding.GetString(buffer) ?? String.Empty).Trim();
switch ((DBFFieldType)field.FieldType)
{
case DBFFieldType.Character:
record[field] = text;
break;
case DBFFieldType.Currency:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = 0.0m;
}
}
else
{
record[field] = Convert.ToDecimal(text);
}
break;
case DBFFieldType.Numeric:
case DBFFieldType.Float:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = 0.0f;
}
}
else
{
record[field] = Convert.ToSingle(text);
}
break;
case DBFFieldType.Date:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = DateTime.MinValue;
}
}
else
{
record[field] = DateTime.ParseExact(text, "yyyyMMdd", CultureInfo.InvariantCulture);
}
break;
case DBFFieldType.DateTime:
if (String.IsNullOrWhiteSpace(text) || BitConverter.ToInt64(buffer, 0) == 0)
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = DateTime.MinValue;
}
}
else
{
record[field] = JulianToDateTime(BitConverter.ToInt64(buffer, 0));
}
break;
case DBFFieldType.Double:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = 0.0;
}
}
else
{
record[field] = Convert.ToDouble(text);
}
break;
case DBFFieldType.Integer:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = 0;
}
}
else
{
record[field] = BitConverter.ToInt32(buffer, 0);    
}
break;
case DBFFieldType.Logical:
if (String.IsNullOrWhiteSpace(text))
{
if ((field.Flags & DBFFieldFlags.AllowNullValues) == DBFFieldFlags.AllowNullValues)
{
record[field] = null;
}
else
{
record[field] = false;
}
}
else
{
record[field] = (buffer[0] == 'Y' || buffer[0] == 'T');    
}
break;
case DBFFieldType.Memo:
case DBFFieldType.General:
case DBFFieldType.Picture:
default:
record[field] = buffer;
break;
}
}
records.Add(record);
}
}
public DataTable ReadToDataTable()
{
ReadRecords();
var table = new DataTable();
// Columns
foreach (var field in fields)
{
var colType = ToDbType(field.FieldType);
var column = new DataColumn(field.FieldName, colType ?? typeof(String));
table.Columns.Add(column);
}
// Rows
foreach (var record in records)
{
var row = table.NewRow();
foreach (var column in record.Keys)
{
row[column.FieldName] = record[column] ?? DBNull.Value;
}
table.Rows.Add(row);
}
return table;
}
public IEnumerable<Dictionary<string, object>> ReadToDictionary()
{
ReadRecords();
return records.Select(record => record.ToDictionary(r => r.Key.FieldName, r => r.Value)).ToList();
}
public IEnumerable<T> ReadToObject<T>()
where T : new()
{
ReadRecords();
var type = typeof(T);
var list = new List<T>();
foreach (var record in records)
{
T item = new T();
foreach (var pair in record.Select(s => new { Key = s.Key.FieldName, Value = s.Value }))
{
var property = type.GetProperty(pair.Key, BindingFlags.IgnoreCase | BindingFlags.Public | BindingFlags.Instance);
if (property != null)
{
if (property.PropertyType == pair.Value.GetType())
{
property.SetValue(item, pair.Value, null);
}
else
{
if (pair.Value != DBNull.Value)
{
property.SetValue(item, System.Convert.ChangeType(pair.Value, property.PropertyType), null);
}
}
}
}
list.Add(item);
}
return list;
}
private DBFHeader header;
private List<DBFFieldDescriptor> fields = new List<DBFFieldDescriptor>();
private List<Dictionary<DBFFieldDescriptor, object>> records = new List<Dictionary<DBFFieldDescriptor,object>>();
#region IDisposable
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected void Dispose(bool disposing)
{
if (disposing == false) return;
if (reader != null)
{
reader.Close();
reader.Dispose();
reader = null;
}
}
~DBFReader()
{
Dispose(false);
} 
#endregion
/// <summary>
/// Convert a Julian Date as long to a .NET DateTime structure
/// Implemented from pseudo code at http://en.wikipedia.org/wiki/Julian_day
/// </summary>
/// <param name="julianDateAsLong">Julian Date to convert (days since 01/01/4713 BC)</param>
/// <returns>DateTime</returns>
private static DateTime JulianToDateTime(long julianDateAsLong)
{
if (julianDateAsLong == 0) return DateTime.MinValue;
double p = Convert.ToDouble(julianDateAsLong);
double s1 = p + 68569;
double n = Math.Floor(4 * s1 / 146097);
double s2 = s1 - Math.Floor(((146097 * n) + 3) / 4);
double i = Math.Floor(4000 * (s2 + 1) / 1461001);
double s3 = s2 - Math.Floor(1461 * i / 4) + 31;
double q = Math.Floor(80 * s3 / 2447);
double d = s3 - Math.Floor(2447 * q / 80);
double s4 = Math.Floor(q / 11);
double m = q + 2 - (12 * s4);
double j = (100 * (n - 49)) + i + s4;
return new DateTime(Convert.ToInt32(j), Convert.ToInt32(m), Convert.ToInt32(d));
}
/// <summary>
/// This is the file header for a DBF. We do this special layout with everything
/// packed so we can read straight from disk into the structure to populate it
/// </summary>
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
private struct DBFHeader
{
/// <summary>The version.</summary>
public readonly DBFVersion Version;
/// <summary>The update year.</summary>
public readonly byte UpdateYear;
/// <summary>The update month.</summary>
public readonly byte UpdateMonth;
/// <summary>The update day.</summary>
public readonly byte UpdateDay;
/// <summary>The number of records.</summary>
public readonly int NumberOfRecords;
/// <summary>The length of the header.</summary>
public readonly short HeaderLenght;
/// <summary>The length of the bytes records.</summary>
public readonly short RecordLenght;
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 16)]
public readonly byte[] Reserved;
/// <summary>Table Flags</summary>
public readonly DBFTableFlags TableFlags;
/// <summary>Code Page Mark</summary>
public readonly byte CodePage;
/// <summary>Reserved, contains 0x00</summary>
public readonly short EndOfHeader;
}
public enum DBFVersion : byte
{
Unknown = 0,
FoxBase = 0x02,
FoxBaseDBase3NoMemo = 0x03,
VisualFoxPro = 0x30,
VisualFoxProWithAutoIncrement = 0x31,
dBase4SQLTableNoMemo = 0x43,
dBase4SQLSystemNoMemo = 0x63,
FoxBaseDBase3WithMemo = 0x83,
dBase4WithMemo = 0x8B,
dBase4SQLTableWithMemo = 0xCB,
FoxPro2WithMemo = 0xF5,
FoxBASE = 0xFB
}
[Flags]
public enum DBFTableFlags : byte
{
None = 0x00,
HasStructuralCDX = 0x01,
HasMemoField = 0x02,
IsDBC = 0x04
}
/// <summary>
/// This is the field descriptor structure. There will be one of these for each column in the table.
/// </summary>
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
private struct DBFFieldDescriptor
{
/// <summary>The field name.</summary>
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 11)]
public readonly string FieldName;
/// <summary>The field type.</summary>
public readonly char FieldType;
/// <summary>The field address.</summary>
public readonly int Address;
/// <summary>The field length in bytes.</summary>
public readonly byte FieldLength;
/// <summary>The field precision.</summary>
public readonly byte DecimalCount;
/// <summary>Field Flags</summary>
public readonly DBFFieldFlags Flags;
/// <summary>AutoIncrement next value</summary>
public readonly int AutoIncrementNextValue;
/// <summary>AutoIncrement step value</summary>
public readonly byte AutoIncrementStepValue;
/// <summary>Reserved</summary>
[MarshalAs(UnmanagedType.ByValArray, SizeConst = 8)]
public readonly byte[] Reserved;
public override string ToString()
{
return String.Format("{0} {1}", FieldName, FieldType);
}
}
[Flags]
public enum DBFFieldFlags : byte
{
None = 0x00,
System = 0x01,
AllowNullValues = 0x02,
Binary = 0x04,
AutoIncrementing = 0x0C
}
public enum DBFFieldType : int
{
Character = 'C',
Currency = 'Y',
Numeric = 'N',
Float = 'F',
Date = 'D',
DateTime = 'T',
Double = 'B',
Integer = 'I',
Logical = 'L',
Memo = 'M',
General = 'G',
Picture = 'P'
}
public static Type ToDbType(char type)
{
switch ((DBFFieldType)type)
{
case DBFFieldType.Float:
return typeof(float);
case DBFFieldType.Integer:
return typeof(int);
case DBFFieldType.Currency:
return typeof(decimal);
case DBFFieldType.Character:
case DBFFieldType.Memo:
return typeof(string);
case DBFFieldType.Date:
case DBFFieldType.DateTime:
return typeof(DateTime);
case DBFFieldType.Logical:
return typeof(bool);
case DBFFieldType.General:
case DBFFieldType.Picture:
return typeof(byte[]);
default:
return null;
}
}
}
}

答案1

得分: 2

你关于FPT文件结构的信息并不完全正确,所有详细信息都在VFP帮助文件中。

你是否必须以低级方式读取它?如果必须这样做,基本上你要做的是:

  • 从DBF本身收集备忘录块的编号(块编号存储在DBF中相应的字段中),
  • 对于备忘录条目,转到FPT中的块编号(取决于块大小)
  • 从块头中读取大小并获取与存储大小匹配的字节。

在执行此操作时,始终以二进制形式读取,因为与C字符串不同,VFP字符串值不是ASCIIZ值,它们甚至可以包含'\x0'。

如果不一定要以这种方式阅读,那么简单的方法是使用VFPOLEDB OleDb驱动程序进行读取。例如:

void Main()
{
    string cn = @"Provider=VFPOLEDB;Data source=C:\PROGRAM FILES (X86)\MICROSOFT VISUAL FOXPRO 9\SAMPLES\DATA;";
    string query = "select emp_id, first_name, last_name, notes from Employee";

    DataTable t = new DataTable();
    new OleDbDataAdapter(query, cn).Fill(t);

    foreach (var row in t.AsEnumerable())
    {
        Console.WriteLine($"{row["emp_id"]}, {row["first_name"]}, {row["last_name"]}, ({row["notes"]})");
    }
}

请注意,VFPOLEDB驱动程序是32位的,您需要针对x86平台进行目标设置。据说Sybase ADS提供了64位驱动程序(我从未使用过)。

PS:还要搜索Tom Brother的Linq To VFP、Linq To EF VFP驱动程序。
PS2:我快速浏览了你提供的源代码,请注意它适用于较旧的VFP版本,实际上并不正确(即:假设备忘录是一个字符串,这是不正确的 - 如果你在C#中将它们视为字符串,你可能会丢失许多数据)。

英文:

What is your question?
(Would be a mess as comment)

Your information about FPT file structure is not entirely correct and full details exist in the VFP help file.

Do you have to read it low level? If you have to, basically what you do is:

  • Collect memo block numbers from the DBF itself (block numbers are stored in corresponding field in DBF),
  • For a memo entry, go to block number in FPT (depends on block size)
  • From the block header read the size and get the bytes that matches the stored size.

While doing that, always read as binary, because unlike C strings, VFP string values are not ASCIIZ values, they can even contain '\x0'.

If it is not a must to read that way then the easy way is to simply read it using VFPOLEDB OleDb driver. ie:

void Main()
{
string cn = @"Provider=VFPOLEDB;Data source=C:\PROGRAM FILES (X86)\MICROSOFT VISUAL FOXPRO 9\SAMPLES\DATA;";
string query = "select emp_id, first_name, last_name, notes  from Employee";
DataTable t = new DataTable();
new OleDbDataAdapter(query, cn).Fill(t);
foreach (var row in t.AsEnumerable())
{
Console.WriteLine($"{row["emp_id"]}, {row["first_name"]}, {row["last_name"]}, ({row["notes"]})");
}
}

Note that VFPOLEDB driver is 32 bits, you would need to target x86 platform. There are 64 bits drivers from Sybase ADS as they say (have never used).

PS: Also search for Tom Brother's Linq To VFp, Linq To EF VFP drivers.
PS2: I quickly glanced the source code you provided, and beware that is for older VFP versions and is not really correct (ie: Assumes Memo is a string which is not correct - if you accept them string in C# you are likely going to lose many data).

huangapple
  • 本文由 发表于 2023年2月16日 13:41:26
  • 转载请务必保留本文链接:https://go.coder-hub.com/75468259.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定