英文:
Reading text file and printing the vowel count
问题
我正在尝试从文本文件中打印元音字母的计数。我没有得到预期的计数结果。我可以使用常规方法来计算字符数,例如使用strings.Count()函数。但是我想使用并发来看看它的工作原理。这是文本文件的链接-> "https://www.gutenberg.org/cache/epub/2600/pg2600.txt"
文件大小-> 3.36 MB
以下是我的代码。
非常感谢您的帮助。
使用以下代码得到的结果:
A - 145310
E - 188280
I - 116195
O - 131900
U - 47560
预期结果:
A - 202712
E - 313609
I - 172232
O - 190103
U - 64401
package main
import (
	"bufio"
	"fmt"
	"io"
	"os"
	"strings"
	"sync"
)
const mb = 1024 * 1024
func main() {
	aCount, eCount, iCount, oCount, uCount := 0, 0, 0, 0, 0
	wg := &sync.WaitGroup{}
	channel := make(chan string)
	done := make(chan bool, 1)
	go func() {
		for s := range channel {
			if strings.Contains(strings.ToUpper(s), "A") {
				aCount++
			}
			if strings.Contains(strings.ToUpper(s), "E") {
				eCount++
			}
			if strings.Contains(strings.ToUpper(s), "I") {
				iCount++
			}
			if strings.Contains(strings.ToUpper(s), "O") {
				oCount++
			}
			if strings.Contains(strings.ToUpper(s), "U") {
				uCount++
			}
		}
		done <- true
	}()
	var current int64
	var limit int64 = 0.5 * mb
	for i := 0; i < 5; i++ {
		wg.Add(1)
		go func() {
			read(current, limit, "interview.txt", channel)
			fmt.Printf("%d thread has been completed", i)
			wg.Done()
		}()
		current += limit + 1
	}
	// Wait for all go routines to complete.
	wg.Wait()
	close(channel)
	<-done
	close(done)
	fmt.Println("A - ", aCount)
	fmt.Println("E - ", eCount)
	fmt.Println("I - ", iCount)
	fmt.Println("O - ", oCount)
	fmt.Println("U - ", uCount)
}
func read(offset int64, limit int64, fileName string, channel chan string) {
	file, err := os.Open(fileName)
	defer file.Close()
	if err != nil {
		panic(err)
	}
	file.Seek(offset, 0)
	reader := bufio.NewReader(file)
	if offset != 0 {
		_, err = reader.ReadBytes(' ')
		if err == io.EOF {
			fmt.Println("EOF")
			return
		}
		if err != nil {
			panic(err)
		}
	}
	var cummulativeSize int64
	for {
		// Break if read size has exceed the chunk size.
		if cummulativeSize > limit {
			break
		}
		b, err := reader.ReadBytes(' ')
		// Break if end of file is encountered.
		if err == io.EOF {
			break
		}
		if err != nil {
			panic(err)
		}
		cummulativeSize += int64(len(b))
		s := strings.TrimSpace(string(b))
		if s != "" {
			channel <- s
		}
	}
}
英文:
I am trying to print vowel count from a text file. I am not getting the expected count. I can do this using a regular approach of just counting the character count using strings.Count(). But I want to use concurrency to see how it works. This is the link to text file -> "https://www.gutenberg.org/cache/epub/2600/pg2600.txt"
File size -> 3.36 MB
Below is my code.
Any help is appreciated.
Result I am getting using below code:
A -  145310
E -  188280
I -  116195
O -  131900
U -  47560
Expected result :
A -  202712
E -  313609
I -  172232
O -  190103
U -  64401
package main
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"sync"
)
const mb = 1024 * 1024
func main() {
aCount, eCount, iCount, oCount, uCount := 0, 0, 0, 0, 0
wg := &sync.WaitGroup{}
channel := make(chan (string))
done := make(chan (bool), 1)
go func() {
for s := range channel {
if strings.Contains(strings.ToUpper(s), "A") {
aCount++
}
if strings.Contains(strings.ToUpper(s), "E") {
eCount++
}
if strings.Contains(strings.ToUpper(s), "I") {
iCount++
}
if strings.Contains(strings.ToUpper(s), "O") {
oCount++
}
if strings.Contains(strings.ToUpper(s), "U") {
uCount++
}
}
done <- true
}()
var current int64
var limit int64 = 0.5 * mb
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
read(current, limit, "interview.txt", channel)
fmt.Printf("%d thread has been completed", i)
wg.Done()
}()
current += limit + 1
}
// Wait for all go routines to complete.
wg.Wait()
close(channel)
<-done
close(done)
fmt.Println("A - ", aCount)
fmt.Println("E - ", eCount)
fmt.Println("I - ", iCount)
fmt.Println("O - ", oCount)
fmt.Println("U - ", uCount)
}
func read(offset int64, limit int64, fileName string, channel chan (string)) {
file, err := os.Open(fileName)
defer file.Close()
if err != nil {
panic(err)
}
file.Seek(offset, 0)
reader := bufio.NewReader(file)
if offset != 0 {
_, err = reader.ReadBytes(' ')
if err == io.EOF {
fmt.Println("EOF")
return
}
if err != nil {
panic(err)
}
}
var cummulativeSize int64
for {
// Break if read size has exceed the chunk size.
if cummulativeSize > limit {
break
}
b, err := reader.ReadBytes(' ')
// Break if end of file is encountered.
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
cummulativeSize += int64(len(b))
s := strings.TrimSpace(string(b))
if s != "" {
channel <- s
}
}
}
答案1
得分: 1
你有一个数据竞争。结果是未定义的。
$ go run -race vowels.go
==================
警告:数据竞争
由goroutine 11在0x00c0000bc068处读取:
main.main.func2()
vowels.go:55 +0x58
之前由主goroutine在0x00c0000bc068处写入:
main.main()
vowels.go:60 +0x459
Goroutine 11(正在运行)创建于:
main.main()
vowels.go:53 +0x434
==================
==================
警告:数据竞争
由goroutine 8在0x00c0000bc078处读取:
main.main.func2()
vowels.go:56 +0x8b
之前由主goroutine在0x00c0000bc078处写入:
main.main()
vowels.go:51 +0x492
Goroutine 8(正在运行)创建于:
main.main()
vowels.go:53 +0x434
==================
英文:
You have a data race. The results are undefined.
$ go run -race vowels.go
==================
WARNING: DATA RACE
Read at 0x00c0000bc068 by goroutine 11:
main.main.func2()
vowels.go:55 +0x58
Previous write at 0x00c0000bc068 by main goroutine:
main.main()
vowels.go:60 +0x459
Goroutine 11 (running) created at:
main.main()
vowels.go:53 +0x434
==================
==================
WARNING: DATA RACE
Read at 0x00c0000bc078 by goroutine 8:
main.main.func2()
vowels.go:56 +0x8b
Previous write at 0x00c0000bc078 by main goroutine:
main.main()
vowels.go:51 +0x492
Goroutine 8 (running) created at:
main.main()
vowels.go:53 +0x434
==================
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。


评论