英文:
Calculating the Work Rate in parallel computing Golang
问题
我目前正在处理一个同时顺序和并行运行图像处理的工作。作为我的度量标准的一部分,我正在尝试计算工作速率,但是我在网上找不到计算工作速率的公式或者太多的信息。
有人知道计算工作速率所需的方程吗?
编辑:
这是我的主要函数,其中包含度量计算。如果这有帮助的话,我知道可能有更好的方法来获取某些数据等,但是通过一些试错,我已经到达了这个点。
runtime.GOMAXPROCS(4)
file, err := os.Open("space.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
log.Fatal(os.Stderr, "%s: %v\n", "space.jpg", err)
}
for i := 0; i < 10; i++ {
TSeqStart := time.Now()
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := 0; y < b.Max.Y; y++ {
for x := 0; x < b.Max.X; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}
TSeq := time.Since(TSeqStart)
//ns := TSeq.Nanoseconds()
avgSeq = avgSeq + TSeq
fmt.Printf("\nTime in ns (Sequential): ", TSeq)
outFile, err := os.Create("changed.jpg")
if err != nil {
log.Fatal(err)
}
defer outFile.Close()
jpeg.Encode(outFile, imgSet, nil)
}
avgSeq = avgSeq / 10
fmt.Print("\n\nAverage sequential time for 10 runs: ", avgSeq)
//parallel version
file2, err := os.Open("space.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img2, err := jpeg.Decode(file2)
if err != nil {
log.Fatal(os.Stderr, "%s: %v\n", "space.jpg", err)
}
for j := 1; j <= 4; j++ {
runtime.GOMAXPROCS(j)
for i := 0; i < 10; i++ {
TParStart := time.Now()
imgSet2 := imgprocess(img2, runtime.NumCPU(), splitVert(1024), rgbtogrey)
TPar := time.Since(TParStart)
//ns2 := TPar.Nanoseconds()
avgPar = avgPar + TPar
fmt.Print("\nTime in Nanoseconds (Parallel) with GOMAXPROCS set at ", j, ": ", TPar)
outFile2, err := os.Create("changed2.jpg")
if err != nil {
log.Fatal(err)
}
defer outFile2.Close()
jpeg.Encode(outFile2, imgSet2, nil)
if err != nil {
log.Fatalf("encoding image: %v", err)
}
}
avgPar = avgPar / 10
fmt.Print("\n\nAverage time for 10 runs in parallel (GOMAXPROCS:", j, "): ", avgPar)
var j64 time.Duration
j64 = time.Duration(j)
totalPar := j64 * avgPar
fmt.Print("\n\nTotal Parallel time: ", totalPar)
speedup := avgSeq.Seconds() / avgPar.Seconds()
fmt.Printf("\n\nSpeed up: %f", speedup)
var jfloat float64
jfloat = float64(j)
theoreticalMin := avgSeq.Seconds() / jfloat
fmt.Print("\n\nTheoretical Minimum: ", theoreticalMin, "ms")
var tPFloat float64
tPFloat = float64(totalPar)
efficiency := avgSeq.Seconds() / tPFloat
fmt.Print("\n\n Efficiency: ", efficiency, "%")
overhead := totalPar - avgSeq
fmt.Print("\n\nOverhead time: ", overhead, "\n")
}
英文:
I'm currently working on piece of work that runs image manipulation both sequentially and in parallel. I'm trying to work out the work rate as part of my Metrics however I can't find a formula online or much information about it,
Does anyone have the equation required to calculate work rate?
Edit:
This is my main function which has the metric calculations in if this helps? I know there are probably better ways for me to obtain certain data etc, but a bit of trial and error has got me to this point.
runtime.GOMAXPROCS(4)
file, err := os.Open("space.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
log.Fatal(os.Stderr, "%s: %v\n", "space.jpg", err)
}
for i:= 0; i<10; i++{
TSeqStart := time.Now()
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := 0; y < b.Max.Y; y++ {
for x := 0; x < b.Max.X; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}
TSeq := time.Since(TSeqStart)
//ns := TSeq.Nanoseconds()
avgSeq = avgSeq +TSeq
fmt.Printf("\nTime in ns (Sequential): " , TSeq)
outFile, err := os.Create("changed.jpg")
if err != nil {
log.Fatal(err)
}
defer outFile.Close()
jpeg.Encode(outFile, imgSet, nil)
}
avgSeq = avgSeq/10
fmt.Print("\n\nAverage sequential time for 10 runs: ", avgSeq)
//parallel version
file2, err := os.Open("space.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img2, err := jpeg.Decode(file2)
if err != nil {
log.Fatal(os.Stderr, "%s: %v\n", "space.jpg", err)
}
for j:=1;j<=4;j++{
runtime.GOMAXPROCS(j)
for i:= 0; i<10; i++{
TParStart:= time.Now()
imgSet2 := imgprocess(img2, runtime.NumCPU(), splitVert(1024), rgbtogrey)
TPar := time.Since(TParStart)
//ns2 := TPar.Nanoseconds()
avgPar = avgPar +TPar
fmt.Print("\nTime in Nanoseconds (Parallel) with GOMAXPROCS set at ",j ,": " , TPar)
outFile2, err := os.Create("changed2.jpg")
if err != nil {
log.Fatal(err)
}
defer outFile2.Close()
jpeg.Encode(outFile2, imgSet2, nil)
if err != nil {
log.Fatalf("encoding image: %v", err)
}
}
avgPar = avgPar/10
fmt.Print("\n\nAverage time for 10 runs in parallel (GOMAXPROCS:",j,"): ", avgPar)
var j64 time.Duration
j64 = time.Duration(j)
totalPar := j64*avgPar
fmt.Print("\n\nTotal Parallel time: ", totalPar)
speedup := avgSeq.Seconds()/avgPar.Seconds()
fmt.Printf("\n\nSpeed up: %f", speedup)
var jfloat float64
jfloat = float64(j)
theoreticalMin := avgSeq.Seconds()/jfloat
fmt.Print("\n\nTheoretical Minimum: ", theoreticalMin,"ms")
var tPFloat float64
tPFloat = float64(totalPar)
efficiency := avgSeq.Seconds()/tPFloat
fmt.Print("\n\n Efficiency: ", efficiency,"%")
overhead := totalPar - avgSeq
fmt.Print("\n\nOverhead time: ", overhead ,"\n")
答案1
得分: 2
首先,我将尝试解释这段代码的作用(NumCPU=4):
package main
import (
"image"
"image/color"
"image/jpeg"
"os"
"testing"
)
func changeImage(img image.Image) {
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := 0; y < b.Max.Y; y++ {
for x := 0; x < b.Max.X; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}
}
func BenchmarkParallel(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
changeImage(img)
}
})
}
func BenchmarkSingle(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
changeImage(img)
}
}
输出结果:
$ go test -bench=. -cpu=1,2,4,6
goos: linux
goarch: amd64
pkg: so/space
BenchmarkParallel 50 22901501 ns/op 2296662 B/op 571021 allocs/op
BenchmarkParallel-2 100 11599582 ns/op 2290637 B/op 571021 allocs/op
BenchmarkParallel-4 200 10631362 ns/op 2287631 B/op 571021 allocs/op
BenchmarkParallel-6 200 10916331 ns/op 2287629 B/op 571021 allocs/op
BenchmarkSingle 50 23645522 ns/op 2284582 B/op 571021 allocs/op
BenchmarkSingle-2 50 23158899 ns/op 2284584 B/op 571021 allocs/op
BenchmarkSingle-4 50 31069104 ns/op 2284589 B/op 571021 allocs/op
BenchmarkSingle-6 50 28026326 ns/op 2284586 B/op 571021 allocs/op
PASS
ok so/space 14.047s
附加说明:
接下来,阅读相关文档。
修复错误并对优化版本进行改进。将优化版本与之前建立的基准进行比较。
package main
import (
"image"
"image/color"
"image/jpeg"
"os"
"testing"
)
func changeImageOpt(img image.Image) *image.RGBA {
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
r, g, b, _ := img.At(x, y).RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
r, g, b, a := color.Gray{uint8(lum / 256)}.RGBA()
rgba := color.RGBA{R: uint8(r), G: uint8(g), B: uint8(b), A: uint8(a)}
imgSet.SetRGBA(x, y, rgba)
}
}
return imgSet
}
func BenchmarkSingleOpt(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
changeImageOpt(img)
}
}
输出结果:
$ go test -bench=Single -cpu=2
goos: linux
goarch: amd64
pkg: so/space
BenchmarkSingle-2 20 84970866 ns/op 2284584 B/op 571021 allocs/op
BenchmarkSingleOpt-2 30 48353165 ns/op 1371010 B/op 190342 allocs/op
PASS
ok so/space 4.648s
按照文档中的说明,我们在 CPU 时间和内存分配方面取得了显著的减少。
对于并行基准测试(NumCPU=4),也有相应的改进。
$ go test -bench=Parallel -cpu=1,2,3,4,6
goos: linux
goarch: amd64
pkg: so/space
BenchmarkParallel 20 87135554 ns/op 2314774 B/op 571021 allocs/op
BenchmarkParallel-2 30 46567417 ns/op 2304732 B/op 571021 allocs/op
BenchmarkParallel-3 30 43262344 ns/op 2304736 B/op 571021 allocs/op
BenchmarkParallel-4 30 42593397 ns/op 2304763 B/op 571021 allocs/op
BenchmarkParallel-6 30 40803415 ns/op 2304804 B/op 571021 allocs/op
BenchmarkParallelOpt 30 47932887 ns/op 1391139 B/op 190342 allocs/op
BenchmarkParallelOpt-2 50 25216902 ns/op 1383094 B/op 190342 allocs/op
BenchmarkParallelOpt-3 50 23723356 ns/op 1383099 B/op 190342 allocs/op
BenchmarkParallelOpt-4 50 22400713 ns/op 1383101 B/op 190342 allocs/op
BenchmarkParallelOpt-6 50 22250405 ns/op 1383100 B/op 190342 allocs/op
PASS
ok so/space 19.662s
英文:
First, I would try to explain what was going on here (NumCPU=4):
package main
import (
"image"
"image/color"
"image/jpeg"
"os"
"testing"
)
func changeImage(img image.Image) {
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := 0; y < b.Max.Y; y++ {
for x := 0; x < b.Max.X; x++ {
oldPixel := img.At(x, y)
r, g, b, _ := oldPixel.RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
pixel := color.Gray{uint8(lum / 256)}
imgSet.Set(x, y, pixel)
}
}
}
func BenchmarkParallel(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
changeImage(img)
}
})
}
func BenchmarkSingle(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
changeImage(img)
}
}
Output:
$ go test -bench=. -cpu=1,2,4,6
goos: linux
goarch: amd64
pkg: so/space
BenchmarkParallel 50 22901501 ns/op 2296662 B/op 571021 allocs/op
BenchmarkParallel-2 100 11599582 ns/op 2290637 B/op 571021 allocs/op
BenchmarkParallel-4 200 10631362 ns/op 2287631 B/op 571021 allocs/op
BenchmarkParallel-6 200 10916331 ns/op 2287629 B/op 571021 allocs/op
BenchmarkSingle 50 23645522 ns/op 2284582 B/op 571021 allocs/op
BenchmarkSingle-2 50 23158899 ns/op 2284584 B/op 571021 allocs/op
BenchmarkSingle-4 50 31069104 ns/op 2284589 B/op 571021 allocs/op
BenchmarkSingle-6 50 28026326 ns/op 2284586 B/op 571021 allocs/op
PASS
ok so/space 14.047s
ADDENDUM:
Next, read the relevant documentation.
Fix the bugs and make improvements for an optimized version. Compare the optimized version to the baseline that we established earlier.
package main
import (
"image"
"image/color"
"image/jpeg"
"os"
"testing"
)
func changeImageOpt(img image.Image) *image.RGBA {
b := img.Bounds()
imgSet := image.NewRGBA(b)
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
r, g, b, _ := img.At(x, y).RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
r, g, b, a := color.Gray{uint8(lum / 256)}.RGBA()
rgba := color.RGBA{R: uint8(r), G: uint8(g), B: uint8(b), A: uint8(a)}
imgSet.SetRGBA(x, y, rgba)
}
}
return imgSet
}
func BenchmarkSingleOpt(b *testing.B) {
file, err := os.Open("space.jpg")
if err != nil {
b.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
changeImageOpt(img)
}
}
Output:
$ go test -bench=Single -cpu=2
goos: linux
goarch: amd64
pkg: so/space
BenchmarkSingle-2 20 84970866 ns/op 2284584 B/op 571021 allocs/op
BenchmarkSingleOpt-2 30 48353165 ns/op 1371010 B/op 190342 allocs/op
PASS
ok so/space 4.648s
Following the instructions in the documentation, we have a significant reduction in CPU time and memory allocations.
There are corresponding improvements for parallel benchmarks (NumCPU=4).
$ go test -bench=Parallel -cpu=1,2,3,4,6
goos: linux
goarch: amd64
pkg: so/space
BenchmarkParallel 20 87135554 ns/op 2314774 B/op 571021 allocs/op
BenchmarkParallel-2 30 46567417 ns/op 2304732 B/op 571021 allocs/op
BenchmarkParallel-3 30 43262344 ns/op 2304736 B/op 571021 allocs/op
BenchmarkParallel-4 30 42593397 ns/op 2304763 B/op 571021 allocs/op
BenchmarkParallel-6 30 40803415 ns/op 2304804 B/op 571021 allocs/op
BenchmarkParallelOpt 30 47932887 ns/op 1391139 B/op 190342 allocs/op
BenchmarkParallelOpt-2 50 25216902 ns/op 1383094 B/op 190342 allocs/op
BenchmarkParallelOpt-3 50 23723356 ns/op 1383099 B/op 190342 allocs/op
BenchmarkParallelOpt-4 50 22400713 ns/op 1383101 B/op 190342 allocs/op
BenchmarkParallelOpt-6 50 22250405 ns/op 1383100 B/op 190342 allocs/op
PASS
ok so/space 19.662s
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论