2021年6月28日 22:58:53go评论85阅读模式

英文:

Concurrency not running any faster

问题

我已经写了一段代码，尝试使用并发来提高运行速度，但并没有起到加速的作用。我该如何改进呢？

package main

import (
	"bufio"
	"fmt"
	"os"
	"strings"
	"sync"
)

var wg sync.WaitGroup

func checkerr(e error) {
	if e != nil {
		fmt.Println(e)
	}
}

func readFile() {
	file, err := os.Open("data.txt")
	checkerr(err)
	fres, err := os.Create("resdef.txt")
	checkerr(err)

	defer file.Close()
	defer fres.Close()

	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		wg.Add(1)
		go func(text string) {
			defer wg.Done()
			words := strings.Fields(text)
			shellsort(words)
			writeToFile(fres, words)
		}(scanner.Text())
	}
	wg.Wait()
}

func shellsort(words []string) {
	for inc := len(words) / 2; inc > 0; inc = (inc + 1) * 5 / 11 {
		for i := inc; i < len(words); i++ {
			j, temp := i, words[i]
			for ; j >= inc && strings.ToLower(words[j-inc]) > strings.ToLower(temp); j -= inc {
				words[j] = words[j-inc]
			}
			words[j] = temp
		}
	}
}

func writeToFile(f *os.File, words []string) {
	datawriter := bufio.NewWriter(f)
	for _, s := range words {
		datawriter.WriteString(s + " ")
	}
	datawriter.WriteString("\n")
	datawriter.Flush()
}

func main() {
	readFile()
}

除了并发之外，其他都正常工作，但是使用并发后，执行时间并没有减少。

英文:

I have written a code, tried to use concurrency but it's not helping to run any faster. How can I improve that?

package main
import (
&quot;bufio&quot;
&quot;fmt&quot;
&quot;os&quot;
&quot;strings&quot;
&quot;sync&quot;
)
var wg sync.WaitGroup
func checkerr(e error) {
if e != nil {
fmt.Println(e)
}
}
func readFile() {
file, err := os.Open(&quot;data.txt&quot;)
checkerr(err)
fres, err := os.Create(&quot;resdef.txt&quot;)
checkerr(err)
defer file.Close()
defer fres.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
wg.Add(1)
go func() {
words := strings.Fields(scanner.Text())
shellsort(words)
writeToFile(fres, words)
wg.Done()
}()
wg.Wait()
}
}
func shellsort(words []string) {
for inc := len(words) / 2; inc &gt; 0; inc = (inc + 1) * 5 / 11 {
for i := inc; i &lt; len(words); i++ {
j, temp := i, words[i]
for ; j &gt;= inc &amp;&amp; strings.ToLower(words[j-inc]) &gt; strings.ToLower(temp); j -= inc {
words[j] = words[j-inc]
}
words[j] = temp
}
}
}
func writeToFile(f *os.File, words []string) {
datawriter := bufio.NewWriter(f)
for _, s := range words {
datawriter.WriteString(s + &quot; &quot;)
}
datawriter.WriteString(&quot;\n&quot;)
datawriter.Flush()
}
func main() {
readFile()
}

Everything works well except that it take the same time to do everything as without concurrency.

答案1

得分: 3

你必须在for循环之后放置wg.Wait()：

    for condition {
        wg.Add(1)
        go func() {
            // 并发任务在这里
            wg.Done()
        }()       
    }
    wg.Wait()

注意：工作本身应该具有并发的特性。

这是我测试过的解决方案 - 从输入文件中顺序读取，然后执行n个并发任务，最后按顺序将结果写入输出文件，可以尝试这个：

package main

import (
	"bufio"
	"fmt"
	"log"
	"os"
	"runtime"
	"sort"
	"strings"
	"sync"
)

type sortQueue struct {
	index int
	data  []string
}

func main() {
	n := runtime.NumCPU()
	a := make(chan sortQueue, n)
	b := make(chan sortQueue, n)
	var wg sync.WaitGroup
	for i := 0; i < n; i++ {
		wg.Add(1)
		go parSort(a, b, &wg)
	}
	go func() {
		file, err := os.Open("data.txt")
		if err != nil {
			log.Fatal(err)
		}
		defer file.Close()
		scanner := bufio.NewScanner(file)
		i := 0
		for scanner.Scan() {
			a <- sortQueue{index: i, data: strings.Fields(scanner.Text())}
			i++
		}
		close(a)
		err = scanner.Err()
		if err != nil {
			log.Fatal(err)
		}
	}()

	fres, err := os.Create("resdef.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer fres.Close()
	go func() {
		wg.Wait()
		close(b)
	}()
	writeToFile(fres, b, n)
}

func writeToFile(f *os.File, b chan sortQueue, n int) {
	m := make(map[int][]string, n)
	order := 0
	for v := range b {
		m[v.index] = v.data
		var slice []string
		exist := true
		for exist {
			slice, exist = m[order]
			if exist {
				delete(m, order)
				order++
				s := strings.Join(slice, " ")
				fmt.Println(s)
				_, err := f.WriteString(s + "\n")
				if err != nil {
					log.Fatal(err)
				}
			}
		}
	}
}

func parSort(a, b chan sortQueue, wg *sync.WaitGroup) {
	defer wg.Done()
	for q := range a {
		sort.Slice(q.data, func(i, j int) bool { return q.data[i] < q.data[j] })
		b <- q
	}
}

data.txt 文件内容：

1 2 0 3
a 1 b d 0 c 
aa cc bb

输出结果：

0 1 2 3
0 1 a b c d
aa bb cc

英文:

You must place wg.Wait() after the for loop:

    for condition {
wg.Add(1)
go func() {
// a concurrent job here
wg.Done()
}()       
}
wg.Wait()

Note: the work itself should have a concurrent nature.

Here is my tested solution - read from the input file sequentially then do n concurrent tasks and finally write to the output file sequentially in order, try this:

package main
import (
&quot;bufio&quot;
&quot;fmt&quot;
&quot;log&quot;
&quot;os&quot;
&quot;runtime&quot;
&quot;sort&quot;
&quot;strings&quot;
&quot;sync&quot;
)
type sortQueue struct {
index int
data  []string
}
func main() {
n := runtime.NumCPU()
a := make(chan sortQueue, n)
b := make(chan sortQueue, n)
var wg sync.WaitGroup
for i := 0; i &lt; n; i++ {
wg.Add(1)
go parSort(a, b, &amp;wg)
}
go func() {
file, err := os.Open(&quot;data.txt&quot;)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
i := 0
for scanner.Scan() {
a &lt;- sortQueue{index: i, data: strings.Fields(scanner.Text())}
i++
}
close(a)
err = scanner.Err()
if err != nil {
log.Fatal(err)
}
}()
fres, err := os.Create(&quot;resdef.txt&quot;)
if err != nil {
log.Fatal(err)
}
defer fres.Close()
go func() {
wg.Wait()
close(b)
}()
writeToFile(fres, b, n)
}
func writeToFile(f *os.File, b chan sortQueue, n int) {
m := make(map[int][]string, n)
order := 0
for v := range b {
m[v.index] = v.data
var slice []string
exist := true
for exist {
slice, exist = m[order]
if exist {
delete(m, order)
order++
s := strings.Join(slice, &quot; &quot;)
fmt.Println(s)
_, err := f.WriteString(s + &quot;\n&quot;)
if err != nil {
log.Fatal(err)
}
}
}
}
}
func parSort(a, b chan sortQueue, wg *sync.WaitGroup) {
defer wg.Done()
for q := range a {
sort.Slice(q.data, func(i, j int) bool { return q.data[i] &lt; q.data[j] })
b &lt;- q
}
}

data.txt file:

1 2 0 3
a 1 b d 0 c 
aa cc bb

Output:

0 1 2 3
0 1 a b c d
aa bb cc

答案2

得分: 2

你没有并行化任何内容，因为对于每个对wg.Add(1)的调用，你都有一个匹配的对wg.Wait()的调用。这是一对一的关系：你生成一个Go协程，然后立即阻塞主Go协程，等待新生成的协程完成。

WaitGroup的目的是等待多个任务完成，当所有Go协程都已生成时，通过单个调用wg.Wait()等待。

然而，除了修复对wg.Wait的调用之外，你还需要控制对扫描器的并发访问。一种方法是使用通道将扫描器发出的文本行传递给等待的Go协程：

lines := make(chan string)

go func() {
    for line := range lines {
        go func(line string) {
            words := strings.Fields(line)
            shellsort(words)
            writeToFile(fres, words)
        }(line)
    }
}()

scanner := bufio.NewScanner(file)
for scanner.Scan() {
    lines <- scanner.Text()
}
close(lines)

请注意，这可能会导致文件输出混乱，因为你有许多并发的Go协程同时写入它们的结果。你可以通过第二个通道来控制输出：

lines := make(chan string)
out := make(chan []string)

go func() {
    for line := range lines {
        go func(line string) {
            words := strings.Fields(line)
            shellsort(words)
            out <- words
        }(line)
    }
}()

go func() {
    for words := range out {
        writeToFile(fres, words)
    }
}()

scanner := bufio.NewScanner(file)
for scanner.Scan() {
    lines <- scanner.Text()
}
close(lines)
close(out)

此时，你可以重构为一个“读取器”、一个“处理器”和一个“写入器”，它们通过通道进行通信。

读取器和写入器使用单个Go协程来防止对资源的并发访问，而处理器生成许多Go协程（当前是无限制的）来将工作“扇出”到许多处理器上：

package main

import (
    "bufio"
    "os"
    "strings"
)

func main() {
    lines := reader()
    out := processor(lines)
    writer(out)
}

func reader() chan<- string {
    lines := make(chan string)

    file, err := os.Open("data.txt")
    checkerr(err)
    go func() {
        scanner := bufio.NewScanner(file)
        for scanner.Scan() {
            lines <- scanner.Text()
        }
        close(lines)
    }()

    return lines
}

func processor(lines chan<- string) chan []string {
    out := make(chan []string)

    go func() {
        for line := range lines {
            go func(line string) {
                words := strings.Fields(line)
                shellsort(words)
                out <- words
            }(line)
        }
        close(out)
    }()
    return out
}

func writer(out chan<- []string) {
    fres, err := os.Create("resdef.txt")
    checkerr(err)
    for words := range out {
        writeToFile(fres, words)
    }
}

英文:

You're not parallelizing anything, because for every call to wg.Add(1) you have matching call to wg.Wait(). It's one-to-one: You spawn a Go routine, and then immediately block the main Go routine waiting for the newly spawned routine to finish.

The point of a WaitGroup is to wait for many things to finish, with a single call to wg.Wait() when all the Go routines have been spawned.

However, in addition to fixing your call to wg.Wait, you need to control concurrent access to your scanner. One approach to this might be to use a channel for your scanner to emit lines of text to waiting Go routines:

	lines := make(chan string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
writeToFile(fres, words)
}(line)
}
}()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines &lt;- scanner.Text()
}
close(lines)

Note that this may lead to garbled output in your file, as you have many concurrent Go routines all writing their results at the same time. You can control output through a second channel:

	lines := make(chan string)
out := make(chan []string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
out &lt;- words
}(line)
}
}()
go func() {
for words := range out {
writeToFile(fres, words)
}
}()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines &lt;- scanner.Text()
}
close(lines)
close(out)

At this point, you can refactor into a "reader", a "processor" and a "writer", which form a pipeline that communicates via channels.

The reader and writer use a single go routine to prevent concurrent access to a resource, while the processor spawns many go routines (currently unbounded) to "fan out" the work across many processors:

package main
import (
&quot;bufio&quot;
&quot;os&quot;
&quot;strings&quot;
)
func main() {
lines := reader()
out := processor(lines)
writer(out)
}
func reader() chan&lt;- string {
lines := make(chan string)
file, err := os.Open(&quot;data.txt&quot;)
checkerr(err)
go func() {
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines &lt;- scanner.Text()
}
close(lines)
}()
return lines
}
func processor(lines chan&lt;- string) chan []string {
out := make(chan []string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
out &lt;- words
}(line)
}
close(out)
}()
return out
}
func writer(out chan&lt;- []string) {
fres, err := os.Create(&quot;resdef.txt&quot;)
checkerr(err)
for words := range out {
writeToFile(fres, words)
}
}

答案3

得分: 1

正如其他答案所说，通过在每次循环迭代中等待WaitGroup，你将并发限制为1（没有并发）。有许多解决方法，但正确的解决方法完全取决于哪些操作需要时间，而这在问题中没有显示出来。并发并不能神奇地使事情变得更快；它只是让事情同时发生，只有在那些需要很长时间的操作可以同时进行时，才能加快速度。

假设在你的代码中，需要很长时间的是排序操作。如果是这样的话，你可以这样做：

results := make(chan []string)
for scanner.Scan() {
    wg.Add(1)
    go func(line string) {
        words := strings.Fields(line)
        shellsort(words)
        results <- words
        wg.Done()
    }(scanner.Text())
}

go func() {
    wg.Wait()
    close(results)
}()

for words := range results {
    writeToFile(fres, words)
}

这将把Wait移到了应该在的位置，并避免了并发使用扫描器和写入器。如果排序操作需要相当长的处理时间，这应该比串行处理更快。

英文:

As other answers have said, by waiting on the WaitGroup each loop iteration, you're limiting your concurrency to 1 (no concurrency). There are a number of ways to solve this, but what's correct depends entirely on what is taking time, and that hasn't been shown in the question. Concurrency doesn't magically make things faster; it just lets things happen at the same time, which only makes things faster if things that take a lot of time can happen concurrently.

Presumably, in your code, the thing that takes a long time is the sort. If that is the case, you could do something like this:

results := make(chan []string)
for scanner.Scan() {
wg.Add(1)
go func(line string) {
words := strings.Fields(line)
shellsort(words)
result &lt;- words
}(scanner.Text())
}
go func() {
wg.Wait()
close(results)
}()
for words := range results {
writeToFile(fres, words)
}

This moves the Wait to where it should be, and avoids concurrent use of the scanner and writer. This should be faster than serial processing, if the sort is taking a significant amount of processing time.

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

并发性并没有运行得更快

问题

答案1

答案2

答案3

在Viper中访问嵌套的YAML结构的问题

Testify, mock unexpected method call when the expectation is already written , but the method is called twice with different parameter

跨语言（de）序列化

如何将字符串连接到Helm中的.AsConfig结果？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论