英文:
Write UTF-16 encoded CSV using Golang
问题
我需要编写一个UTF-16编码的CSV文件,但我无法弄清楚如何做到这一点。我找到了很多关于读取UTF-16 CSV文件的问题和答案,但没有关于写入的内容。
这是我到目前为止尝试过的代码:
package main
import (
"encoding/csv"
"fmt"
"os"
"golang.org/x/text/encoding/unicode"
)
func main() {
csvFile, err := os.Create("test.csv")
if err != nil {
panic(err)
}
defer csvFile.Close()
message := "weird characters: дгодг"
message, err = convertUtf8ToUtf16LE(message)
if err != nil {
panic(err)
}
fmt.Println(message)
csvWriter := csv.NewWriter(csvFile)
defer csvWriter.Flush()
csvWriter.Write([]string{message})
csvWriter.Write([]string{message})
}
func convertUtf8ToUtf16LE(message string) (string, error) {
encoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder()
return encoder.String(message)
}
但是我在CSV文件中得到了以下输出:
weird characters: дгодг*矾攀椀爀搀 挀栀愀爀愀挀琀攀爀猀㨀 㐀㌄㸄㐄㌄
我做错了什么?
英文:
I need to write a UTF-16 conded csv and I can't figure out how to.
I found a lot of questions and answers about reading UTF-16 csvs but nothing about writing.
This is what I've tried so far:
package main
import (
"encoding/csv"
"fmt"
"os"
"golang.org/x/text/encoding/unicode"
)
func main() {
csvFile, err := os.Create("test.csv")
if err != nil {
panic(err)
}
defer csvFile.Close()
message := "weird characters: дгодг"
message, err = convertUtf8ToUtf16LE(message)
if err != nil {
panic(err)
}
fmt.Println(message)
csvWriter := csv.NewWriter(csvFile)
defer csvWriter.Flush()
csvWriter.Write([]string{message})
csvWriter.Write([]string{message})
}
func convertUtf8ToUtf16LE(message string) (string, error) {
encoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder()
return encoder.String(message)
}
But I get the following output in the csv:
weird characters: дгодг*矾攀椀爀搀 挀栀愀爀愀挀琀攀爀猀㨀 㐀㌄㸄㐄㌄
What am I doing wrong?
答案1
得分: 2
我是新手,但我认为这段代码与你自己编写的代码功能相同,并且使用了相同的文本包。
从你的问题来看,你只是缺少了transformer.NewWriter()
组件。
我参考了这个答案,并将其改为了编码为UTF16LE(哦,我的天啊,Go让这些事情变得如此简单):
package main
import (
"encoding/csv"
"os"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func main() {
records := [][]string{
{"first_name", "last_name", "username"},
{"Rob", "Pike", "rob"},
{"Ken", "Thompson", "ken"},
{"Robert", "Griesemer", "gri"},
}
f, _ := os.Create("utf16le.csv")
t := transform.NewWriter(f, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder())
w := csv.NewWriter(t)
for _, record := range records {
w.Write(record)
}
w.Flush()
}
查看生成的CSV文件:
hexdump -C utf16le.csv
00000000 ff fe 66 00 69 00 72 00 73 00 74 00 5f 00 6e 00 |��f.i.r.s.t._.n.|
00000010 61 00 6d 00 65 00 2c 00 6c 00 61 00 73 00 74 00 |a.m.e.,.l.a.s.t.|
00000020 5f 00 6e 00 61 00 6d 00 65 00 2c 00 75 00 73 00 |_.n.a.m.e.,.u.s.|
00000030 65 00 72 00 6e 00 61 00 6d 00 65 00 0a 00 52 00 |e.r.n.a.m.e...R.|
00000040 6f 00 62 00 2c 00 50 00 69 00 6b 00 65 00 2c 00 |o.b.,.P.i.k.e.,.|
00000050 72 00 6f 00 62 00 0a 00 4b 00 65 00 6e 00 2c 00 |r.o.b...K.e.n.,.|
00000060 54 00 68 00 6f 00 6d 00 70 00 73 00 6f 00 6e 00 |T.h.o.m.p.s.o.n.|
00000070 2c 00 6b 00 65 00 6e 00 0a 00 52 00 6f 00 62 00 |,.k.e.n...R.o.b.|
00000080 65 00 72 00 74 00 2c 00 47 00 72 00 69 00 65 00 |e.r.t.,.G.r.i.e.|
00000090 73 00 65 00 6d 00 65 00 72 00 2c 00 67 00 72 00 |s.e.m.e.r.,.g.r.|
000000a0 69 00 0a 00 |i...|
000000a4
英文:
I'm new to transforming, but I think this does the same thing you wrote for yourself, and it uses the same text package.
From your question, it looks like you were just missing the transformer.NewWriter()
component.
I took this answer, and just turned it around to make it encode UTF16LE (oh. my. stars. Go makes these things so easy):
package main
import (
"encoding/csv"
"os"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func main() {
records := [][]string{
{"first_name", "last_name", "username"},
{"Rob", "Pike", "rob"},
{"Ken", "Thompson", "ken"},
{"Robert", "Griesemer", "gri"},
}
f, _ := os.Create("utf16le.csv")
t := transform.NewWriter(f, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder())
w := csv.NewWriter(t)
for _, record := range records {
w.Write(record)
}
w.Flush()
}
And viewing that CSV file:
hexdump -C utf16le.csv
00000000 ff fe 66 00 69 00 72 00 73 00 74 00 5f 00 6e 00 |��f.i.r.s.t._.n.|
00000010 61 00 6d 00 65 00 2c 00 6c 00 61 00 73 00 74 00 |a.m.e.,.l.a.s.t.|
00000020 5f 00 6e 00 61 00 6d 00 65 00 2c 00 75 00 73 00 |_.n.a.m.e.,.u.s.|
00000030 65 00 72 00 6e 00 61 00 6d 00 65 00 0a 00 52 00 |e.r.n.a.m.e...R.|
00000040 6f 00 62 00 2c 00 50 00 69 00 6b 00 65 00 2c 00 |o.b.,.P.i.k.e.,.|
00000050 72 00 6f 00 62 00 0a 00 4b 00 65 00 6e 00 2c 00 |r.o.b...K.e.n.,.|
00000060 54 00 68 00 6f 00 6d 00 70 00 73 00 6f 00 6e 00 |T.h.o.m.p.s.o.n.|
00000070 2c 00 6b 00 65 00 6e 00 0a 00 52 00 6f 00 62 00 |,.k.e.n...R.o.b.|
00000080 65 00 72 00 74 00 2c 00 47 00 72 00 69 00 65 00 |e.r.t.,.G.r.i.e.|
00000090 73 00 65 00 6d 00 65 00 72 00 2c 00 67 00 72 00 |s.e.m.e.r.,.g.r.|
000000a0 69 00 0a 00 |i...|
000000a4
答案2
得分: 1
我最终做的是创建了一个实现了io.Writer接口的结构体,用于文件写入,但在写入之前将输入转换为UTF-16LE编码:
type UTF16LEWriter struct {
file *os.File
encoder *encoding.Encoder
}
func NewUTF16LEWriter(file *os.File) (*UTF16LEWriter, error) {
_, err := file.Write([]byte{0xFF, 0xFE}) // UTF-16LE BOM
if err != nil {
return &UTF16LEWriter{}, err
}
return &UTF16LEWriter{
file: file,
encoder: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder(),
}, nil
}
func (w *UTF16LEWriter) Write(b []byte) (int, error) {
b, err := w.encoder.Bytes(b)
if err != nil {
return 0, err
}
w.file.Write(b)
return len(b), err
}
然后,我只需要用我的io.Writer替换os.Create提供的接口,并将其传递给csvWriter:
package main
import (
"encoding/csv"
"fmt"
"os"
"golang.org/x/text/encoding/unicode"
)
func main() {
csvFile, err := os.Create("test.csv")
if err != nil {
panic(err)
}
defer csvFile.Close()
utf16Writer, err := NewUTF16LEWriter(csvFile)
if err != nil {
panic(err)
}
csvWriter := csv.NewWriter(utf16Writer)
defer csvWriter.Flush()
message := "weird characters: дгодг"
csvWriter.Write([]string{message})
csvWriter.Write([]string{message})
}
英文:
What I ended up doing is I created a struct that implements io.Writer for a file but converts the input to UTF-16LE before writing:
type UTF16LEWriter struct {
file *os.File
encoder *encoding.Encoder
}
func NewUTF16LEWriter(file *os.File) (*UTF16LEWriter, error) {
_, err := file.Write([]byte{0xFF, 0xFE}) // UTF-16LE BOM
if err != nil {
return &UTF16LEWriter{}, err
}
return &UTF16LEWriter{
file: file,
encoder: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder(),
}, nil
}
func (w *UTF16LEWriter) Write(b []byte) (int, error) {
b, err := w.encoder.Bytes(b)
if err != nil {
return 0, err
}
w.file.Write(b)
return len(b), err
}
Then I only need to replace the io.Writer provided by os.Create with mine and pass that to the csvWriter:
package main
import (
"encoding/csv"
"fmt"
"os"
"golang.org/x/text/encoding/unicode"
)
func main() {
csvFile, err := os.Create("test.csv")
if err != nil {
panic(err)
}
defer csvFile.Close()
utf16Writer, err := NewUTF16LEWriter(csvFile)
if err != nil {
panic(err)
}
csvWriter := csv.NewWriter(utf16Writer)
defer csvWriter.Flush()
message := "weird characters: дгодг"
csvWriter.Write([]string{message})
csvWriter.Write([]string{message})
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论