识别golang死锁。5个哲学家问题

huangapple go评论91阅读模式
英文:

Identifying golang deadlock. 5 philosophers problem

问题

我在wg.Wait()这一行上遇到了fatal error: all goroutines are asleep - deadlock!的错误。大约30%的运行会出现这个错误,其余的运行没有错误。我猜我可能没有正确使用WaitGroup,但不确定我做错了什么。也许有人可以帮我找出错误?谢谢!

package main

import (
	"fmt"
	"math/rand"
	"sync"
	"time"
)

const (
	numOfPhilosophers = 5
	numOfMeals        = 3
	maxEaters         = 2
)

var doOnce sync.Once

func main() {
	chopsticks := make([]sync.Mutex, 5)
	permissionChannel := make(chan bool)
	finishEating := make(chan bool)
	go permissionFromHost(permissionChannel, finishEating)
	var wg sync.WaitGroup
	wg.Add(numOfPhilosophers)
	for i := 1; i <= numOfPhilosophers; i++ {
		go eat(i, &chopsticks[i-1], &chopsticks[i%numOfPhilosophers], &wg, permissionChannel, finishEating)
	}
	wg.Wait()
}

func eat(philosopherId int, left *sync.Mutex, right *sync.Mutex, wg *sync.WaitGroup, permissionChannel <-chan bool, finishEatingChannel chan<- bool) {
	defer wg.Done()
	for i := 1; i <= numOfMeals; i++ {
		//lock chopsticks in random order
		if RandBool() {
			left.Lock()
			right.Lock()
		} else {
			right.Lock()
			left.Lock()
		}

		fmt.Printf("waiting for permission from host %d\n", philosopherId)
		<-permissionChannel

		fmt.Printf("starting to eat %d (time %d)\n", philosopherId, i)
		fmt.Printf("finish to eat %d (time %d)\n", philosopherId, i)
		//release chopsticks
		left.Unlock()
		right.Unlock()

		//let host know I am done eating
		finishEatingChannel <- true
	}
}

func permissionFromHost(permissionChannel chan<- bool, finishEating <-chan bool) {
	ctr := 0
	for {
		select {
		case <-finishEating:
			ctr--
		default:
			if ctr < maxEaters {
				ctr++
				permissionChannel <- true
			}
		}
	}
}

func RandBool() bool {
	rand.Seed(time.Now().UnixNano())
	return rand.Intn(2) == 1
}

编辑1:我修复了互斥锁的传递方式,但问题没有解决。

编辑2:我尝试使用带缓冲的通道permissionChannel := make(chan bool, numOfPhilosophers),这样可以解决问题。

编辑3:@Jaroslaw的示例也可以解决问题。

英文:

I am getting fatal error: all goroutines are asleep - deadlock!
on the line wg.Wait()
It happens for about ~30% of the runs, the rest are finished with no error. I guess I am using WaitGroup the wrong way, but not sure what am I doing wrong.
Maybe someone can help me identify my bug? Thanks!

package main
import (
&quot;fmt&quot;
&quot;math/rand&quot;
&quot;sync&quot;
&quot;time&quot;
)
const (
numOfPhilosophers = 5
numOfMeals = 3
maxEaters = 2
)
var doOnce sync.Once
func main() {
chopsticks := make([]sync.Mutex, 5)
permissionChannel := make(chan bool)
finishEating := make(chan bool)
go permissionFromHost(permissionChannel,finishEating)
var wg sync.WaitGroup
wg.Add(numOfPhilosophers)
for i:=1 ; i&lt;=numOfPhilosophers ; i++ {
go eat(i, chopsticks[i-1], chopsticks[i%numOfPhilosophers], &amp;wg, permissionChannel, finishEating)
}
wg.Wait()
}
func eat(philosopherId int, left sync.Mutex, right sync.Mutex, wg *sync.WaitGroup, permissionChannel &lt;-chan bool, finishEatingChannel chan&lt;- bool) {
defer wg.Done()
for i:=1 ; i&lt;=numOfMeals ; i++ {
//lock chopsticks in random order
if RandBool() {
left.Lock()
right.Lock()
} else {
right.Lock()
left.Lock()
}
fmt.Printf(&quot;waiting for permission from host %d\n&quot;,philosopherId)
&lt;-permissionChannel
fmt.Printf(&quot;starting to eat %d (time %d)\n&quot;, philosopherId, i)
fmt.Printf(&quot;finish to eat %d (time %d)\n&quot;, philosopherId, i)
//release chopsticks
left.Unlock()
right.Unlock()
//let host know I am done eating
finishEatingChannel&lt;-true
}
}
func permissionFromHost(permissionChannel chan&lt;-bool, finishEating &lt;-chan bool) {
ctr := 0
for {
select {
case &lt;-finishEating:
ctr--
default:
if ctr&lt;maxEaters {
ctr++
permissionChannel&lt;-true
}
}
}
}
func RandBool() bool {
rand.Seed(time.Now().UnixNano())
return rand.Intn(2) == 1
}

Edit 1: I fixed the mutex to be passed by reference. It didn't solve the problem.

Edit 2: I tried to use buffered channel permissionChannel:=make(chan bool, numOfPhilosophers) which makes it work

Edit 3: also @Jaroslaw example makes it work

答案1

得分: 1

go vet命令的输出如下:

./main.go:26:13: 调用 eat 复制了 lock 值:sync.Mutex
./main.go:26:30: 调用 eat 复制了 lock 值:sync.Mutex
./main.go:31:34: eat 通过值传递了 lock:sync.Mutex
./main.go:31:52: eat 通过值传递了 lock:sync.Mutex

另一个问题是,有时候当goroutine(哲学家)尝试在finishEatingChannel上发送确认时会被阻塞,因为负责从这个无缓冲通道读取数据的goroutine(主机)正忙于尝试发送一个许可。以下是代码的确切部分:

            if ctr&lt;maxEaters {
ctr++
// 这个goroutine被卡住了,因为最后一个哲学家没有从permissionChannel中读取。
// 哲学家没有从这个通道中读取,而是忙于尝试写入finishEating通道,而这个goroutine没有读取。
// 因此发生了死锁。
permissionChannel&lt;-true 
}

当只剩下一个需要吃两次的哲学家时,死锁是100%可重现的。

修复后的代码如下:

package main

import (
	"fmt"
	"math/rand"
	"sync"
	"time"
)

const (
	numOfPhilosophers = 5
	numOfMeals        = 3
	maxEaters         = 2
)

func main() {
	chopsticks := make([]sync.Mutex, 5)
	permissionChannel := make(chan bool)
	finishEating := make(chan bool)
	go permissionFromHost(permissionChannel, finishEating)
	var wg sync.WaitGroup
	wg.Add(numOfPhilosophers)
	for i := 1; i <= numOfPhilosophers; i++ {
		go eat(i, &chopsticks[i-1], &chopsticks[i%numOfPhilosophers], &wg, permissionChannel, finishEating)
	}
	wg.Wait()
}

func eat(philosopherId int, left *sync.Mutex, right *sync.Mutex, wg *sync.WaitGroup, permissionChannel <-chan bool, finishEatingChannel chan<- bool) {
	defer wg.Done()
	for i := 1; i <= numOfMeals; i++ {
		//lock chopsticks in random order
		if RandBool() {
			left.Lock()
			right.Lock()
		} else {
			right.Lock()
			left.Lock()
		}

		fmt.Printf("等待主机许可 %d\n", philosopherId)
		<-permissionChannel

		fmt.Printf("开始吃饭 %d (第 %d 次)\n", philosopherId, i)
		fmt.Printf("结束吃饭 %d (第 %d 次)\n", philosopherId, i)
		//release chopsticks
		left.Unlock()
		right.Unlock()

		//let host know I am done eating
		finishEatingChannel <- true
	}
}

func permissionFromHost(permissionChannel chan<- bool, finishEating <-chan bool) {
	ctr := 0
	for {
		if ctr < maxEaters {
			select {
			case <-finishEating:
				ctr--
			case permissionChannel <- true:
				ctr++
			}
		} else {
			<-finishEating
			ctr--
		}
	}
}

func RandBool() bool {
	rand.Seed(time.Now().UnixNano())
	return rand.Intn(2) == 1
}
英文:

The go vet command says

./main.go:26:13: call of eat copies lock value: sync.Mutex
./main.go:26:30: call of eat copies lock value: sync.Mutex
./main.go:31:34: eat passes lock by value: sync.Mutex
./main.go:31:52: eat passes lock by value: sync.Mutex

Another problem is that there are times when goroutines (philosophers) get blocked when trying to send an acknowledgement on finishEatingChannel, because the goroutine (host) responsible for reading data from this unbuffered channel is busy trying to send a permission. Here is the exact part of code:

            if ctr&lt;maxEaters {
ctr++
// This goroutine stucks since the last philosopher is not reading from permissionChannel.
// Philosopher is not reading from this channel at is busy trying to write finishEating channel which is not read by this goroutine.
// Thus the deadlock happens.
permissionChannel&lt;-true 
}

Deadlock is 100% reproducible when there is only one philosopher left who needs to eat twice.

Fixed version of code:

package main
import (
&quot;fmt&quot;
&quot;math/rand&quot;
&quot;sync&quot;
&quot;time&quot;
)
const (
numOfPhilosophers = 5
numOfMeals        = 3
maxEaters         = 2
)
func main() {
chopsticks := make([]sync.Mutex, 5)
permissionChannel := make(chan bool)
finishEating := make(chan bool)
go permissionFromHost(permissionChannel, finishEating)
var wg sync.WaitGroup
wg.Add(numOfPhilosophers)
for i := 1; i &lt;= numOfPhilosophers; i++ {
go eat(i, &amp;chopsticks[i-1], &amp;chopsticks[i%numOfPhilosophers], &amp;wg, permissionChannel, finishEating)
}
wg.Wait()
}
func eat(philosopherId int, left *sync.Mutex, right *sync.Mutex, wg *sync.WaitGroup, permissionChannel &lt;-chan bool, finishEatingChannel chan&lt;- bool) {
defer wg.Done()
for i := 1; i &lt;= numOfMeals; i++ {
//lock chopsticks in random order
if RandBool() {
left.Lock()
right.Lock()
} else {
right.Lock()
left.Lock()
}
fmt.Printf(&quot;waiting for permission from host %d\n&quot;, philosopherId)
&lt;-permissionChannel
fmt.Printf(&quot;starting to eat %d (time %d)\n&quot;, philosopherId, i)
fmt.Printf(&quot;finish to eat %d (time %d)\n&quot;, philosopherId, i)
//release chopsticks
left.Unlock()
right.Unlock()
//let host know I am done eating
finishEatingChannel &lt;- true
}
}
func permissionFromHost(permissionChannel chan&lt;- bool, finishEating &lt;-chan bool) {
ctr := 0
for {
if ctr &lt; maxEaters {
select {
case &lt;-finishEating:
ctr--
case permissionChannel &lt;- true:
ctr++
}
} else {
&lt;-finishEating
ctr--
}
}
}
func RandBool() bool {
rand.Seed(time.Now().UnixNano())
return rand.Intn(2) == 1
}

答案2

得分: 1

最后一个goroutine不会退出,当它写入finishEatingChannel通道时,它会在最后一次迭代中被阻塞,因为没有消费者。

没有消费者finishEatingChannel的原因是permissionFromHost函数中的select语句正在向permissionChannel写入permissionChannel<-true,但是没有消费者读取permissionChannel,因此发生了死锁。

你可以将permissionFromHost通道设置为缓冲通道,这样可以解决该问题。

你的代码中还有一个bug,你正在按值传递互斥锁,这是不允许的。

英文:

The last goroutine will not exit, it will get blocked in its last iteration when it is writing to the finishEatingChannel channel as there are no consumers for it.
The reason there are no consumers for the finishEatingChannel is that the select case in the function permissionFromHost is writing to permissionChannel&lt;-true but there are no consumers for permissionChannel as it is waiting for it to be read so we have a deadlock.

You can make the permissionFromHost channel buffered, it will resolve the issue.

There is also a bug in your code, you are passing mutex by value which is not allowed

huangapple
  • 本文由 发表于 2021年6月27日 00:54:01
  • 转载请务必保留本文链接:https://go.coder-hub.com/68144512.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定