英文:
Is it possible to modify TStringField class (with new property)
问题
Delphi 11.3 和使用 UniDac 连接 Oracle 数据库时,我遇到了一个问题:有许多旧的应用程序(用 Cobol 编写)不支持 Unicode,因此文本字段的数据以 Ansi 文本的形式存储,使用(Windows)客户端的代码页(125x)。我必须逐步替换它们,并在“新”表中存储 Unicode(UTF-16)数据。
只要旧的应用程序仍在使用,我就必须将 Ansi 字符串“翻译”成 UTF(写入时也需要相反的转换)。
我已经编写了两个函数:
function AnsiToUTF(Value: AnsiString; codepage: word): string;
和
function UTFToAnsi(Value: string; codepage word): AnsiString
这两个函数都在工作。
我在这个论坛上搜索了一下,似乎可以对 TStringField
类进行更改,但我没有经验如何实现。
如果我能够在 TStringField
类中应用一个新的属性 AsUTF
,并将我的两个函数嵌入到 SetAsUTF()
和 GetAsUTF()
中,这样我就可以在任何使用 TFields
的数据感知组件中调用它们。例如:
MyString := UniQuery1.FieldByName('TEXT').AsUTF(1252);
和
UniQuery1.FieldByName('TEXT').AsUTF(1252) := MyString;
这样做是否可能(或者是否有更好的解决方案)?
#编辑:有人告诉我提供一个可重现的示例,所以在这里是:
unit UTFStringField;
TUTFStringField = class(TWideStringField)
protected
procedure SetAsUTF(UTF: string; Codepage: word);
function GetAsUTF(Codepage: word): string;
constructor Create; override;
destructor Destroy; override;
public
function UTFToAnsi(txt: string; GCodePage: word): AnsiString;
function AnsiToUTF(txt: AnsiString; GCodepage: word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(UTF: string; Codepage: word);
begin
SetAsAnsiString(UTFToAnsi(UTF, Codepage));
end;
function TUTFStringField.GetAsUTF(CodePage: word): string;
begin
Result := AnsiToUTF(GetAsAnsiString, CodePage);
end;
constructor TUTFStringField.Create;
begin
inherited Create;
DefaultFieldClasses[ftWideString] := TUTFStringField;
end;
destructor TUTFStringField.Destroy;
begin
DefaultFieldClasses[ftWideString] := TWideStringField;
inherited Destroy;
end;
function TUTFStringField.AnsiToUTF(txt: AnsiString; GCodepage: word): string;
var
NewStr: string;
OldChar: AnsiChar;
NewChar: Char;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := txt[i];
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($20AC); // #EURO SIGN
$82: NewChar := Char($201A); // #SINGLE LOW-9
$84: NewChar := Char($201E); // #DOUBLE LOW-9
$85: NewChar := Char($2026); // #HORIZONTAL ELLIPSIS
// Add more cases for other characters...
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := AnsiChar(txt[i]);
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($0402); // #CYRILLIC CAPITAL LETTER
$81: NewChar := Char($0403); // #CYRILLIC CAPITAL LETTER
$82: NewChar := Char($201A); // #SINGLE LOW-9 QUOTATION
// Add more cases for other characters...
end;
NewStr := NewStr + NewChar;
end;
end;
end;
Result := NewStr;
end;
function TUTFStringField.UTFToAnsi(txt: string; GCodepage: word): AnsiString;
var
NewStr: AnsiString;
OldChar: Char;
NewChar: AnsiChar;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := Copy(txt, i, 1);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$20AC: NewChar := AnsiChar($80);
$201A: NewChar := AnsiChar($82);
$201E: NewChar := AnsiChar($84); // DOUBLE LOW-9
$2026: NewChar := AnsiChar($85); // HORIZONTAL ELLIPSIS
// Add more cases for other characters...
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := Char(txt[i]);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$0402: NewChar := AnsiChar($80); // CYRILLIC CAPITAL
$0403: NewChar := AnsiChar($81); // CYRILLIC CAPITAL
$201A: NewChar := AnsiChar($82); // SINGLE LOW-9
// Add more cases for other characters...
end;
NewStr := NewStr + NewChar;
end;
end;
end;
Result := NewStr;
end;
interface
RegisterClass(TUTFStringField);
end.
当然,构造函数/析构函数可能不正确,但我不知道如何在何处引入我的新 TUTFStringField
类,以便在例如将 UniQuery 组件拖放到我的窗体上时始终使用它。
哦,顺便说一句:我在 Oracle Uni 提供程序中设置了 "Unicode" 为 true,因为我的新应用程序应该默认使用 Unicode(数据库字符集为 UTF-16)。
英文:
Using Delphi 11.3 and an Oracle DB using UniDac, I have the problem that there are many old apps (written in Cobol) which do not support Unicode, so the data for text fields is stored as Ansi text using the (Windows) client's codepage (125x). I have to replace them step by step and use "new" tables where I store the data in Unicode (UTF-16).
As long as the old apps are still in use, I have to "translate" the Ansi strings into UTF (and back for writing).
I have written 2 functions:
function AnsiToUTF(Value: AnsiString; codepage: word): string;
and
function UTFToAnsi(Value: string; codepage word): AnsiString
which are working.
I searched this forum and it seems to be possible to make changes to the TStringField
class, but I have no experience in doing so.
It would be great if I could apply a new property AsUTF
to the TStringField
class with my 2 functions embedded in SetAsUTF()
and GetAsUTF()
to that I can call them in any data-aware component using TFields
. For example:
MyString := UniQuery1.FieldByName('TEXT').AsUTF(1252);
and
UniQuery1.FieldByName('TEXT').AsUTF(1252) := MyString;
Would that be possible (or is there a better solution)?
#EDIT: I was told to give a reproducible example for it, so here it is:
unit UTFStringField;
TUTFStringField = class(TWideStringField)
protected
procedure SetAsUTF(UTF: string; Codepage: word);
function GetAsUTF(Codepage: word): string;
constructor Create; override;
destructor Destroy; override;
public
function UTFToAnsi(txt: string; GCodePage: word): Ansistring;
function AnsiToUTF(txt: Ansistring; GCodepage: word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(UTF: string; Codepage: word);
begin
SetAsAnsiString(UTFToAnsi(UTF,Codepage));
end;
function TUTFStringField.GetAsUTF(CodePage: word): string;
begin
Result := AnsiToUTF(GetAsAnsiString,CodePage);
end;
constructor TUTFStringField.Create;
begin
inherited Create;
DefaultFieldClasses[ftWideString] := TUTFStringField;
end;
destructor TUTFStringField.Destroy;
begin
DefaultFieldClasses[ftWideString] := TWideStringField;
inherited destroy;
end;
function TUTFStringField.AnsiToUTF(txt: Ansistring; GCodepage: word): string;
var
NewStr: string;
OldChar: AnsiChar;
NewChar: Char;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := txt[i];
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($20AC); // #EURO SIGN
$82: NewChar := Char($201A); // #SINGLE LOW-9
$84: NewChar := Char($201E); // #DOUBLE LOW-9
$85: NewChar := Char($2026); // #HORIZONTAL ELLIPSIS
....
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := AnsiChar(txt[i]);
NewChar := Char(OldChar);
case Ord(OldChar) of
$80: NewChar := Char($0402); // #CYRILLIC CAPITAL LETTER
$81: NewChar := Char($0403); // #CYRILLIC CAPITAL LETTER
$82: NewChar := Char($201A); // #SINGLE LOW-9 QUOTATION
...
end;
NewStr := NewStr + NewChar;
end;
end;
end;
Result := NewStr;
end;
function TUTFStringField.UTFToAnsi(txt: string; GCodepage: word): Ansistring;
var
NewStr: Ansistring;
OldChar: Char;
NewChar: AnsiChar;
i: integer;
begin
NewStr := '';
case GCodepage of
1250: begin
for i := 1 to Length(txt) do
begin
OldChar := Copy(txt,i,1);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$20AC: NewChar := AnsiChar($80);
$201A: NewChar := AnsiChar($82);
$201E: NewChar := AnsiChar($84); // DOUBLE LOW-9
$2026: NewChar := AnsiChar($85); // HORIZONTAL ELLIPSIS
$2020: NewChar := AnsiChar($86); // DAGGER
....
end;
NewStr := NewStr + NewChar;
end;
end;
1251: begin
for i := 1 to Length(txt) do
begin
OldChar := Char(txt[i]);
NewChar := AnsiChar(OldChar);
case Ord(OldChar) of
$0402: NewChar := AnsiChar($80); // CYRILLIC CAPITAL
$0403: NewChar := AnsiChar($81); // CYRILLIC CAPITAL
$201A: NewChar := AnsiChar($82); // SINGLE LOW-9
$0453: NewChar := AnsiChar($83); // CYRILLIC SMALL
$201E: NewChar := AnsiChar($84); // DOUBLE LOW-9
end;
NewStr := NewStr + NewChar;
end;
end;
end;
interface
RegisterClass(TUTFStringField);
end.
Surely the constructor / destructor is wrong, but I have no idea, how and where to introduce my new TUTFStringField
class, so that it is always used in the moment, for example when I drop an UniQuery component on my form.
Oh, and by the way: I set "UniCode"
in the Oracle Uni provider to true, as my new apps should use Unicode as default (the database charset is UTF-16).
答案1
得分: 3
以下是您要翻译的部分:
FYI, your translation functions are unnecessary. The RTL has its own ways of converting to/from Ansi strings with codepages, such as `System.LocaleCharsFromUnicode()` and `System.UnicodeFromLocaleChars()`, or `SysUtils.TEncoding`, or `RawByteString` with `System.SetCodePage()`.
You should use the built-in functionality instead of rolling your own. especially since you are not handling UTF-16 surrogates, whereas the RTL will.
Try something more like this instead:
unit UTFStringField;
interface
uses
Data.DB;
type
TUTFStringField = helper class for TField
public
procedure SetAsUTF(const UTF: string; Codepage: Word);
function GetAsUTF(Codepage: Word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(const UTF: string; Codepage: Word);
var
NewStr: AnsiString;
begin
SetLength(NewStr, LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), nil, 0, nil, nil));
LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), PAnsiChar(NewStr), Length(NewStr), nil, nil);
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(NewStr, enc.GetByteCount(UTF));
enc.GetBytes(PChar(UTF), Length(UTF), PByte(PAnsiChar(NewStr)), Length(NewStr));
finally
enc.Free;
end;
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
}
{ alternatively:
var raw: RawByteString := PRawByteString(@UTF)^;
SetCodePage(raw, Codepage, True);
NewStr := PAnsiString(@raw)^;
}
Self.AsAnsiString := NewStr;
end;
function TUTFStringField.GetAsUTF(Codepage: Word): string;
var
txt: AnsiString;
begin
txt := Self.AsAnsiString;
SetLength(Result, UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), nil, 0));
UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), PWideChar(Result), Length(Result));
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(Result, enc.GetCharCount(PByte(PAnsiChar(txt)), Length(txt)));
enc.GetChars(PByte(PAnsiChar(txt)), Length(txt), PChar(Result), Length(Result));
finally
enc.Free;
end;
}
{ alternatively:
SetCodePage(PRawByteString(@txt)^, Codepage, False);
Result := string(txt);
}
end;
end.
And then, you can call them like this (just make sure UTFStringField
is in the uses
clause):
MyString := UniQuery1.FieldByName('TEXT').GetAsUTF(1252);
UniQuery1.FieldByName('TEXT').SetAsUTF(MyString, 1252);
英文:
FYI, your translation functions are unnecessary. The RTL has its own ways of converting to/from Ansi strings with codepages, such as System.LocaleCharsFromUnicode()
and System.UnicodeFromLocaleChars()
, or SysUtils.TEncoding
, or RawByteString
with System.SetCodePage()
.
You should use the built-in functionality instead of rolling your own. especially since you are not handling UTF-16 surrogates, whereas the RTL will.
Try something more like this instead:
unit UTFStringField;
interface
uses
Data.DB;
type
TUTFStringField = helper class for TField
public
procedure SetAsUTF(const UTF: string; Codepage: Word);
function GetAsUTF(Codepage: Word): string;
end;
implementation
procedure TUTFStringField.SetAsUTF(const UTF: string; Codepage: Word);
var
NewStr: AnsiString;
begin
SetLength(NewStr, LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), nil, 0, nil, nil));
LocaleCharsFromUnicode(Codepage, 0, PChar(UTF), Length(UTF), PAnsiChar(NewStr), Length(NewStr), nil, nil);
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(NewStr, enc.GetByteCount(UTF));
enc.GetBytes(PChar(UTF), Length(UTF), PByte(PAnsiChar(NewStr)), Length(NewStr));
finally
enc.Free;
end;
SetCodePage(PRawByteString(@NewStr)^, Codepage, False);
}
{ alternatively:
var raw: RawByteString := PRawByteString(@UTF)^;
SetCodePage(raw, Codepage, True);
NewStr := PAnsiString(@raw)^;
}
Self.AsAnsiString := NewStr;
end;
function TUTFStringField.GetAsUTF(Codepage: Word): string;
var
txt: AnsiString;
begin
txt := Self.AsAnsiString;
SetLength(Result, UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), nil, 0));
UnicodeFromLocaleChars(Codepage, 0, PAnsiChar(txt), Length(txt), PWideChar(Result), Length(Result));
{ alternatively:
var enc: TEncoding := TEncoding.GetEncoding(Codepage);
try
SetLength(Result, enc.GetCharCount(PByte(PAnsiChar(txt)), Length(txt)));
enc.GetChars(PByte(PAnsiChar(txt)), Length(txt), PChar(Result), Length(Result));
finally
enc.Free;
end;
}
{ alternatively:
SetCodePage(PRawByteString(@txt)^, Codepage, False);
Result := string(txt);
}
end;
end.
And then, you can call them like this (just make sure UTFStringField
is in the uses
clause):
MyString := UniQuery1.FieldByName('TEXT').GetAsUTF(1252);
UniQuery1.FieldByName('TEXT').SetAsUTF(MyString, 1252);
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论