在Golang中过滤字节流的正确方法是什么?

huangapple go评论85阅读模式
英文:

Correct way to filter a byte stream in Golang?

问题

我想要过滤一个命令的标准输出,只保留每个连续的以\r结尾的行块的第一行和最后一行(以忽略大部分的进度指示器)。

这是我的尝试(原始代码做了更多的事情,这是一个简化版本,但基本上过滤必须在输入进来时进行,而不是在最后):

package main

import (
	"bytes"
	"fmt"
	"os/exec"
)

var cr = []byte("\r")
var lf = []byte("\n")

func main() {
	input1 := []byte("a\nb\n\nprogress 98%\r")
	input2 := []byte("progress 99%\r")
	input3 := []byte("progress 100%\r")
	input4 := []byte("\n\nc\n")

	var stream []byte
	stream = append(stream, input1...)
	stream = append(stream, input2...)
	stream = append(stream, input3...)
	stream = append(stream, input4...)

	fmt.Printf("stream:\n%s\n", stream)

	streamer := &myFilter{}
	streamer.Write(input1)
	streamer.Write(input2)
	streamer.Write(input3)
	streamer.Write(input4)
	final := streamer.Bytes()

	fmt.Printf("streamer:\n%s\n\n", final)

	cmd := exec.Command("bash", "-c", "perl -e '$|++; print qq[a\nb\n\nprogress: 98%\r]; for (99..100) { print qq[progess: $_%\r]; sleep(1); } print qq[\n\nc\n]'")
	cmd.Stdout = &myFilter{}
	cmd.Start()
	cmd.Wait()
	fromCmd := cmd.Stdout.(*myFilter).Bytes()

	fmt.Printf("fromCmd:\n%s\n", fromCmd)
}

type myFilter struct {
	partialLine []byte
	storage     []byte
}

func (w *myFilter) Write(p []byte) (n int, err error) {
	// in order to filter out all but the first and last line of a set of \r
	// terminated lines (a progress bar), we need to collect whole \n terminated
	// lines
	lines := bytes.SplitAfter(p, lf)

	if len(w.partialLine) > 0 || (len(lines) == 1 && !bytes.HasSuffix(p, lf)) {
		w.partialLine = append(w.partialLine, lines[0]...)

		partialComplete := false
		if len(lines) > 1 {
			lines = lines[1:]
			partialComplete = true

		} else {
			lines = nil
			if bytes.HasSuffix(p, lf) {
				partialComplete = true
			}
		}

		if partialComplete {
			w.filterCR(w.partialLine)
			w.partialLine = nil
		}
	}

	lastLineIndex := len(lines) - 1
	if lastLineIndex > -1 && !bytes.HasSuffix(p, lf) {
		w.partialLine, lines = lines[lastLineIndex], lines[:lastLineIndex]
	}

	for _, line := range lines {
		w.filterCR(line)
	}

	return len(p), nil
}

func (w *myFilter) filterCR(p []byte) {
	if bytes.Contains(p, cr) {
		lines := bytes.Split(p, cr)
		w.store(lines[0])
		w.store(lf)

		if len(lines) > 2 {
			w.store(lines[len(lines)-2])
			w.store(lf)
		}
	} else {
		w.store(p)
	}
}

func (w *myFilter) store(p []byte) {
	w.storage = append(w.storage, p...)
}

func (w *myFilter) Bytes() []byte {
	if len(w.partialLine) > 0 {
		w.filterCR(w.partialLine)
	}
	return w.storage
}

我的输出是:

stream:
a
b
progress 100%
c
streamer:
a
b
progress 98%
progress 100%
c
fromCmd:
a
b
ss: 100%
progess: 100%
c

我想要的是,从"fromCmd"看到的输出与我从"streamer"得到的输出相匹配。

我做错了什么?为什么我的实际输出似乎"损坏"了?为什么真正的命令运行行为与我的"streamer"测试不同?有什么更好的方法来过滤标准输出?

英文:

I want to filter the STDOUT from a command such that I only keep the first and last line of any contiguous block of \r terminated lines (to largely ignore progress indicators).

Here's my attempt (orig code does more, this is a simplified version, but basically the filtering has to happen as the input comes in, not at the end):

package main
import (
"bytes"
"fmt"
"os/exec"
)
var cr = []byte("\r")
var lf = []byte("\n")
func main() {
input1 := []byte("a\nb\n\nprogress 98%\r")
input2 := []byte("progress 99%\r")
input3 := []byte("progress 100%\r")
input4 := []byte("\n\nc\n")
var stream []byte
stream = append(stream, input1...)
stream = append(stream, input2...)
stream = append(stream, input3...)
stream = append(stream, input4...)
fmt.Printf("stream:\n%s\n", stream)
streamer := &myFilter{}
streamer.Write(input1)
streamer.Write(input2)
streamer.Write(input3)
streamer.Write(input4)
final := streamer.Bytes()
fmt.Printf("streamer:\n%s\n\n", final)
cmd := exec.Command("bash", "-c", "perl -e '$|++; print qq[a\nb\n\nprogress: 98%\r]; for (99..100) { print qq[progess: $_%\r]; sleep(1); } print qq[\n\nc\n]'")
cmd.Stdout = &myFilter{}
cmd.Start()
cmd.Wait()
fromCmd := cmd.Stdout.(*myFilter).Bytes()
fmt.Printf("fromCmd:\n%s\n", fromCmd)
}
type myFilter struct {
partialLine []byte
storage     []byte
}
func (w *myFilter) Write(p []byte) (n int, err error) {
// in order to filter out all but the first and last line of a set of \r
// terminated lines (a progress bar), we need to collect whole \n terminated
// lines
lines := bytes.SplitAfter(p, lf)
if len(w.partialLine) > 0 || (len(lines) == 1 && !bytes.HasSuffix(p, lf)) {
w.partialLine = append(w.partialLine, lines[0]...)
partialComplete := false
if len(lines) > 1 {
lines = lines[1:]
partialComplete = true
} else {
lines = nil
if bytes.HasSuffix(p, lf) {
partialComplete = true
}
}
if partialComplete {
w.filterCR(w.partialLine)
w.partialLine = nil
}
}
lastLineIndex := len(lines) - 1
if lastLineIndex > -1 && !bytes.HasSuffix(p, lf) {
w.partialLine, lines = lines[lastLineIndex], lines[:lastLineIndex]
}
for _, line := range lines {
w.filterCR(line)
}
return len(p), nil
}
func (w *myFilter) filterCR(p []byte) {
if bytes.Contains(p, cr) {
lines := bytes.Split(p, cr)
w.store(lines[0])
w.store(lf)
if len(lines) > 2 {
w.store(lines[len(lines)-2])
w.store(lf)
}
} else {
w.store(p)
}
}
func (w *myFilter) store(p []byte) {
w.storage = append(w.storage, p...)
}
func (w *myFilter) Bytes() []byte {
if len(w.partialLine) > 0 {
w.filterCR(w.partialLine)
}
return w.storage
}

My output is:

stream:
a
b
progress 100%
c
streamer:
a
b
progress 98%
progress 100%
c
fromCmd:
a
b
ss: 100%
progess: 100%
c

What I want is the output you see from "fromCmd" to match the output I got from "streamer".

What am I doing wrong, why does my actual output seem "corrupt", why does the real command run behave differently to my "streamer" test, and what's a better way to filter STDOUT?

答案1

得分: 3

你的部分行算法对于所有输入都不正确。

你可以用 bufio.Scanner 替换 myFilter,它会正确处理部分行缓冲,并使用 []bytebytes.Buffer 来累积输出。

var out bytes.Buffer
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
    p := scanner.Bytes()
    lines := bytes.Split(p, cr)
    out.Write(lines[0])
    out.Write(lf)
    if len(lines) > 1 {
        out.Write(lines[len(lines)-1])
        out.Write(lf)
    }
}
英文:

Your partial line algorithm isn't correct for all inputs.

You can replace myFilter with a bufio.Scanner, which will handle the partial line buffering correctly for you, and a []byte or bytes.Buffer to accumulate the output.

var out bytes.Buffer
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
p := scanner.Bytes()
lines := bytes.Split(p, cr)
out.Write(lines[0])
out.Write(lf)
if len(lines) > 1 {
out.Write(lines[len(lines)-1])
out.Write(lf)
}
}

huangapple
  • 本文由 发表于 2017年2月13日 22:43:16
  • 转载请务必保留本文链接:https://go.coder-hub.com/42206739.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定