英文:
How to create multiple XML files if single XML file matches the requirements?
问题
以下是我的客户端代码,它从golang grpc
服务器流式传输所有客户端的URL,并且运行良好。它接受Request
输入参数,并根据特定的clientId
流式传输customer url
。在我的代码中,我正在为ClientId 12345
流式传输所有客户端的URL,并且运行良好。
我还创建了一个包含特定clientId
中所有URL的XML文件,如下所示。例如:下面的代码将创建一个名为12345_abc.xml
的XML文件,并以特定格式包含其中的所有URL。
func main() {
// 这个"clientId"将来可以配置
clientId := 12345
timeout := time.Duration(1000) * time.Millisecond
ctx, _ := context.WithTimeout(context.Background(), timeout)
conn, err := grpc.DialContext(ctx, "localhost:50005", grpc.WithInsecure())
if err != nil {
log.Fatalf("无法连接服务器 %v", err)
}
// 创建流
client := pb.NewCustomerServiceClient(conn)
req := &pb.Request{ClientId: clientId}
stream, err := client.FetchResponse(context.Background(), req)
if err != nil {
log.Fatalf("打开流错误 %v", err)
}
// 创建新对象,将所有URL数据填充到内存中
urlHolder := NewClient()
t := time.Unix(0, 0).UTC()
done := make(chan bool)
go func() {
for {
resp, err := stream.Recv()
if err == io.EOF {
done <- true
return
}
if err != nil {
log.Fatalf("无法接收 %v", err)
}
log.Printf("接收到响应: %s", resp.GetCustomerUrl())
// 使用所需的字段填充URL对象
urlHolder.Add(&URL{
Loc: resp.GetCustomerUrl(),
LastMod: &t,
ChangeFreq: Daily,
Priority: 10.2,
})
}
}()
<-done
log.Printf("完成")
// 创建一个包含所有URL的XML文件,然后将其保存到磁盘上
// 用于特定clientId。这将创建"12345_abc.xml"
file, _ := os.Create(fmt.Sprintf("%d_abc.xml", clientId))
urlHolder.WriteTo(file)
}
这是我的urlholder.go
文件:
type URL struct {
Loc string `xml:"loc"`
LastMod *time.Time `xml:"lastmod"`
ChangeFreq ChangeFreq `xml:"changefreq"`
Priority float32 `xml:"priority"`
}
type UrlMap struct {
XMLName xml.Name `xml:"urlset"`
Xmlns string `xml:"xmlns,attr"`
URLs []*URL `xml:"url"`
Minify bool `xml:"-"`
}
func NewClient() *UrlMap {
return &UrlMap{
Xmlns: "http://www.sitemaps.org/schemas/sitemap/0.9",
URLs: make([]*URL, 0),
}
}
func (s *UrlMap) Add(u *URL) {
s.URLs = append(s.URLs, u)
}
// WriteTo将XML编码的urlMap写入给定的io.Writer。
func (s *UrlMap) WriteTo(w io.Writer) (n int64, err error) {
cw := NewCounterWriter(w)
_, err = cw.Write([]byte(xml.Header))
if err != nil {
return cw.Count(), err
}
en := xml.NewEncoder(cw)
if !s.Minify {
en.Indent("", " ")
}
err = en.Encode(s)
cw.Write([]byte{'\n'})
return cw.Count(), err
}
这是我的CounterWriter
类 -
// CounterWriter实现io.Writer。跟踪写入的字节数。
type CounterWriter struct {
writer io.Writer
count int64
}
var _ io.Writer = (*CounterWriter)(nil)
// NewCounterWriter包装io.Writer并返回CounterWriter。
func NewCounterWriter(w io.Writer) (cw *CounterWriter) {
return &CounterWriter{
writer: w,
}
}
// Write调用包装的io.Writer的Write,并将写入的字节数添加到计数器中。
func (cw *CounterWriter) Write(p []byte) (n int, err error) {
n, err = cw.writer.Write(p)
cw.count = cw.count + int64(n)
return n, err
}
// Count返回写入Writer的字节数。
func (cw *CounterWriter) Count() (n int64) {
return cw.count
}
问题陈述:
上述代码运行良好,但是如果满足以下要求,我需要将一个XML
文件拆分为多个相同clientId
的XML
文件:
- 单个
XML
文件的最大大小不超过50MB
。可以是近似值,不需要精确。 - 单个
XML
文件的URL数量不超过50K
。
我知道50k URL限制会比50MB限制更快达到,但这就是我得到的要求。现在根据上述逻辑,我需要为特定的clientId
创建多个XML文件。所有这些多个文件可以是12345_abc_1.xml
,12345_abc_2.xml
或任何其他更好的命名格式。我有点困惑应该如何继续进行。
我可以使用for循环添加50K
URL的逻辑,但是对于大小逻辑我感到困惑,而且我希望将其通用化,适用于每个clientId
,所以我在做这个时遇到了困难。
英文:
Below is my client code which stream all the customer url's from golang grpc
server and it works fine. It takes Request
input parameter and streams customer url's
basis on a particular clientId
. In my below code, I am streaming all customer url's for ClientId 12345
and it works fine.
I am also creating an XML file with all the URL's in it for particular clientId
as shown below. For example: Below will create 12345_abc.xml
XML file with all the URL's in them in particular format.
func main() {
// this "clientId" will be configurable in future
clientId := 12345
timeout := time.Duration(1000) * time.Millisecond
ctx, _ := context.WithTimeout(context.Background(), timeout)
conn, err := grpc.DialContext(ctx, "localhost:50005", grpc.WithInsecure())
if err != nil {
log.Fatalf("can not connect with server %v", err)
}
// create stream
client := pb.NewCustomerServiceClient(conn)
req := &pb.Request{ClientId: clientId}
stream, err := client.FetchResponse(context.Background(), req)
if err != nil {
log.Fatalf("open stream error %v", err)
}
// create new object to populate all URL data in memory
urlHolder := NewClient()
t := time.Unix(0, 0).UTC()
done := make(chan bool)
go func() {
for {
resp, err := stream.Recv()
if err == io.EOF {
done <- true
return
}
if err != nil {
log.Fatalf("can not receive %v", err)
}
log.Printf("Resp received: %s", resp.GetCustomerUrl())
// populate URL object with all the required field in it
urlHolder.Add(&URL{
Loc: resp.GetCustomerUrl(),
LastMod: &t,
ChangeFreq: Daily,
Priority: 10.2,
})
}
}()
<-done
log.Printf("finished")
// create an XML file with all the URL's in it and then save it on disk
// for particular clientId. This will create "12345_abc.xml"
file, _ := os.Create(fmt.Sprintf("%d_abc.xml", clientId))
urlHolder.WriteTo(file)
}
Here is my urlholder.go
file:
type URL struct {
Loc string `xml:"loc"`
LastMod *time.Time `xml:"lastmod"`
ChangeFreq ChangeFreq `xml:"changefreq"`
Priority float32 `xml:"priority"`
}
type UrlMap struct {
XMLName xml.Name `xml:"urlset"`
Xmlns string `xml:"xmlns,attr"`
URLs []*URL `xml:"url"`
Minify bool `xml:"-"`
}
func NewClient() *UrlMap {
return &UrlMap{
Xmlns: "http://www.sitemaps.org/schemas/sitemap/0.9",
URLs: make([]*URL, 0),
}
}
func (s *UrlMap) Add(u *URL) {
s.URLs = append(s.URLs, u)
}
// WriteTo writes XML encoded urlMap to given io.Writer.
func (s *UrlMap) WriteTo(w io.Writer) (n int64, err error) {
cw := NewCounterWriter(w)
_, err = cw.Write([]byte(xml.Header))
if err != nil {
return cw.Count(), err
}
en := xml.NewEncoder(cw)
if !s.Minify {
en.Indent("", " ")
}
err = en.Encode(s)
cw.Write([]byte{'\n'})
return cw.Count(), err
}
Here is my CounterWriter
class -
// CounterWriter implements io.Writer. Count of bytes written is tracked.
type CounterWriter struct {
writer io.Writer
count int64
}
var _ io.Writer = (*CounterWriter)(nil)
// NewCounterWriter wraps io.Writer and returns CounterWriter.
func NewCounterWriter(w io.Writer) (cw *CounterWriter) {
return &CounterWriter{
writer: w,
}
}
// Write calls Write on the wrapped io.Writer and adds the number of bytes
// written to the counter.
func (cw *CounterWriter) Write(p []byte) (n int, err error) {
n, err = cw.writer.Write(p)
cw.count = cw.count + int64(n)
return n, err
}
// Count returns the number of bytes written to the Writer.
func (cw *CounterWriter) Count() (n int64) {
return cw.count
}
Problem Statement
Above code works fine but I need to split an XML
file into multiple XML
files for same clientId
if it matches below requirements:
- A single
XML
file should not be more than50MB
max. It can be approximate, doesn't have to be accurate. - A single
XML
file should not have more than50K
URL's max.
I know it's weird that 50k URL limit will be reached sooner than 50MB limit but this is what I got the requirement. Now basis on above logic, I need to make multiple XML files for particular clientId
. All those multiple files can be like this 12345_abc_1.xml
, 12345_abc_2.xml
or any other better naming format. I am kinda confuse on how should I proceed to do this.
I can add logic for 50K
url by using for loop but confuse on the size logic and also I want to make this generic for each clientId
so I am having difficulties doing this.
答案1
得分: 1
在你的WriteTo
函数内部,你应该调用类似w.Write(myBytes)
的语句。
在该函数内部,myBytes
的大小就是你要寻找的大小。你可以使用len(myBytes)
或者w.Write(myBytes)
的第一个返回值来获取它。这很重要,因为除了直接计算你要写入的信息,没有其他方法可以"估计"文件的大小。
你在WriteTo
函数内部将UrlMap
转换为字节。这意味着你可以对任何URL
变量执行相同的操作。
我解决这个问题的方法是在for {
循环内部创建一个sizeCounter
,并在每次创建新的URL
变量时累加将存储的字节数。在同一个位置,我还会计算创建的URL
的数量。有了这两个计数器,剩下的就很容易了。
我会在.Add
函数中添加从URL
到bytes
的转换,并返回它,以便一切更容易理解。你需要将一些变量移到go例程中。
func (s *UrlMap) Add(u *URL) (int) { // 修改此函数以计算大小并返回
s.URLs = append(s.URLs, u)
var urlBytes []byte
var err error
urlBytes, err = xml.Marshal(u) // 使用xml.Marshal或xml.MarshalIndent将其转换为字节
if err != nil {
panic(err) // 或者如果你愿意,返回错误
}
return len(urlBytes)
}
t := time.Unix(0, 0).UTC()
done := make(chan bool)
go func() {
// 在内存中创建一个新的对象来填充所有URL数据
urlHolder := NewClient()
urlCounter := 0
byteCounter := 0
fileCounter := 0
for {
resp, err := stream.Recv()
if err == io.EOF {
done <- true
file, _ := os.Create(fmt.Sprintf("%d_abc_%d.xml", clientId, fileCounter))
urlHolder.WriteTo(file)
return
}
if err != nil {
log.Fatalf("无法接收 %v", err)
}
log.Printf("收到响应:%s", resp.GetCustomerUrl())
// 在这里将URL的字节作为返回值添加
urlBytes := urlHolder.Add(&URL{
Loc: resp.GetCustomerUrl(),
LastMod: &t,
ChangeFreq: Daily,
Priority: 10.2,
})
byteCounter += urlBytes
urlCounter += 1
if byteCounter > 49000000 || urlCounter >= 50000 {
file, _ := os.Create(fmt.Sprintf("%d_abc_%d.xml", clientId, fileCounter))
urlHolder.WriteTo(file)
urlHolder = NewClient() // 为下一次循环创建一个新对象
fileCounter += 1 // 准备下一次循环的fileCounter
byteCounter = 0 // 重置计数变量
urlCounter = 0
}
}
}()
<-done
log.Printf("完成")
// 不再在这里写入文件。
英文:
Inside your WriteTo
function, you should be calling something like w.Write(myBytes)
.
The size of myBytes
inside that function is the size that you are looking for. You can get it using len(myBytes)
or with the first return of w.Write(myBytes)
. This is important because there is no way of "estimating" the size that a file would have, other than directly counting the information that you will write.
You are converting UrlMap
into bytes somewhere inside your WriteTo
function. That means you can do the same with any URL
variable.
The way that I would solve this problem is to have a sizeCounter
and add the number of bytes that would be stored everytime I create a new URL
variable inside the for {
loop. In the same place I would also count the number of URL
s created. With both counters then the rest is easy.
I would add the transformation from URL
to bytes
inside the .Add
function and return it so that everything is easier to understand. You are going to have to move some variables into the go routine.
func (s *UrlMap) Add(u *URL) (int) { // Modify this function to count the size and return it
s.URLs = append(s.URLs, u)
var urlBytes []byte
var err error
urlBytes, err = xml.Marshal(u) // Transform to bytes using xml.Marshal or xml.MarshalIndent
if err != nil {
panic(err) // or return the error if you want
}
return len(urlBytes)
}
t := time.Unix(0, 0).UTC()
done := make(chan bool)
go func() {
// create new object to populate all URL data in memory
urlHolder := NewClient()
urlCounter := 0
byteCounter := 0
fileCounter := 0
for {
resp, err := stream.Recv()
if err == io.EOF {
done <- true
file, _ := os.Create(fmt.Sprintf("%d_abc_%d.xml", clientId, fileCounter))
urlHolder.WriteTo(file)
return
}
if err != nil {
log.Fatalf("can not receive %v", err)
}
log.Printf("Resp received: %s", resp.GetCustomerUrl())
// I add the bytes of the URL here as a return
urlBytes := urlHolder.Add(&URL{
Loc: resp.GetCustomerUrl(),
LastMod: &t,
ChangeFreq: Daily,
Priority: 10.2,
})
byteCounter += urlBytes
urlCounter += 1
if byteCounter > 49000000 || urlCounter >= 50000 {
file, _ := os.Create(fmt.Sprintf("%d_abc_%d.xml", clientId, fileCounter))
urlHolder.WriteTo(file)
urlHolder = NewClient() // create a new object for next loop
fileCounter += 1 // prepare fileCounter for next loop
byteCounter = 0 // restart count variables
urlCounter = 0
}
}
}()
<-done
log.Printf("finished")
// No longer write the files here.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论