如何使这段包含多个for循环的Go代码运行更快?

huangapple go评论86阅读模式
英文:

How can i make this go code with multiple for loops faster?

问题

我最近开始着手这个项目。这是一个关于集体运动的模型。
在这段代码中,8192个粒子1000000个步骤中移动,每一步中每个粒子的位置和角度都会更新。在每一步中,每个粒子的邻居都会与该特定粒子同步角度。
我用Python写了一段代码,但是运行得很慢。然后我用Go语言写了这段代码,但速度还是不够快。每一步平均需要1.3秒,这不太好。
你们有没有任何关于如何加快速度的想法?

package main

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"math"
	"math/rand"
	"os"
	"runtime"
	"time"
)

const (
	n_particles int     = 8192
	n_steps     int     = 1000000
	dt          float64 = 1.0
	v0          float64 = 0.5
	radius      float64 = 1.0
	f_intensity float64 = 1.2
	scale       float64 = 128.0

	alpha float64 = 1 / 36
)

var (
	x      [n_particles + 1]float64
	y      [n_particles + 1]float64
	angles [n_particles + 1]float64
	vx     [n_particles + 1]float64
	vy     [n_particles + 1]float64
	order  [n_steps + 1]float64
	fxk    float64
	fyk    float64
	fxi    float64
	fyi    float64
)

func main() {

	vstart := time.Now()
	rsource := rand.NewSource(time.Now().UnixNano())
	randomizer := rand.New(rsource)
	//定义随机数据
	for i := 0; i <= n_particles; {
		x[i] = (randomizer.Float64()) * scale
		y[i] = (randomizer.Float64()) * scale
		angles[i] = (randomizer.Float64()) * math.Pi * 2
		vx[i] = v0 * float64(math.Cos(angles[i]))
		vy[i] = v0 * float64(math.Sin(angles[i]))

		i = i + 1

	}

	for i := 0; i <= n_steps; {

		start := time.Now()
		cosangles := 0.0
		sinangles := 0.0

		for j := 0; j <= n_particles; {

			x[j] = x[j] + (vx[j] * dt)
			x[j] = math.Mod(x[j], scale)
			y[j] = y[j] + (vy[j] * dt)
			y[j] = math.Mod(x[j], scale)

			j = j + 1
		}

		m_angles := angles

		for j := 0; j <= n_particles; {
			sumanglesx := 0.0
			sumanglesy := 0.0
			fxi = math.Floor(x[j])
			fyi = math.Floor(y[j])

			for k := 0; k <= n_particles; {
				if k != j {
					fxk = math.Floor(x[k])
					fyk = math.Floor(y[k])

					if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk) || (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk) || (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk) || (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk) || (fxi+1 == fxk && fyi-1 == fyk) {

						dist := math.Pow((x[k]-x[j]), 2) + math.Pow((y[k]-y[j]), 2)
						if dist < radius {
							if k > j {
								sx := math.Cos(angles[k])
								sy := math.Sin(angles[k])
								sumanglesx = sumanglesx + sx
								sumanglesy = sumanglesy + sy

							} else if k < j {
								sx := alpha * math.Cos(angles[k])
								sy := alpha * math.Sin(angles[k])
								sumanglesx = sumanglesx + sx
								sumanglesy = sumanglesy + sy

							}

						}

					}

				}

				k = k + 1

			}

			m_angles[j] = math.Atan2(sumanglesx, sumanglesy) + (f_intensity*randomizer.Float64() - f_intensity)

			j = j + 1
		}

		angles = m_angles
		for f := 0; f <= n_particles; {

			vx[f] = v0 * float64(math.Cos(angles[f]))
			vy[f] = v0 * float64(math.Sin(angles[f]))
			f = f + 1
		}

		for h := 0; h <= n_particles; {
			cosangles = cosangles + (math.Cos(angles[h]))
			sinangles = sinangles + (math.Sin(angles[h]))
			h = h + 1
		}

		core := (math.Sqrt(((math.Pow(cosangles, 2)) + (math.Pow(cosangles, 2))))) / float64(n_particles)
		order[i] = core
		duration := time.Since(start)
		fmt.Println(i)
		fmt.Println(duration)
		i = i + 1

	}
	vduration := time.Since(vstart)
	log.Printf("It took : %s", vduration)
	f, _ := os.Create("order.txt")
	for i := 0; i <= n_steps; {

		s := fmt.Sprintf("%f", order[i])

		w := bufio.NewWriter(f)
		w.WriteString(s + "\n")
		w.Flush()

		i = i + 1

	}

}
英文:

I recently started to work on this project . It's a model about collective motion.
In this code 8192 particle move in 1000000 steps and in each step each particles position and angle
updates. in each step neighbors of each particle sync their angle with that specific particle.
I wrote a code in python but that was so slow . then I wrote this code in go but it's still not fast . each step takes average 1.3 s which is not good .
do u guys have any idea about making it faster ?

package main
import (
&quot;bufio&quot;
&quot;flag&quot;
&quot;fmt&quot;
&quot;log&quot;
&quot;math&quot;
&quot;math/rand&quot;
&quot;os&quot;
&quot;runtime&quot;
&quot;time&quot;
)
const (
n_particles int     = 8192
n_steps     int     = 1000000
dt          float64 = 1.0
v0          float64 = 0.5
radius      float64 = 1.0
f_intensity float64 = 1.2
scale       float64 = 128.0
alpha float64 = 1 / 36
)
var (
x      [n_particles + 1]float64
y      [n_particles + 1]float64
angles [n_particles + 1]float64
vx     [n_particles + 1]float64
vy     [n_particles + 1]float64
order  [n_steps + 1]float64
fxk    float64
fyk    float64
fxi    float64
fyi    float64
)
func main() {
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
//defining random data
for i := 0; i &lt;= n_particles; {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
vx[i] = v0 * float64(math.Cos(angles[i]))
vy[i] = v0 * float64(math.Sin(angles[i]))
i = i + 1
}
for i := 0; i &lt;= n_steps; {
start := time.Now()
cosangles := 0.0
sinangles := 0.0
for j := 0; j &lt;= n_particles; {
x[j] = x[j] + (vx[j] * dt)
x[j] = math.Mod(x[j], scale)
y[j] = y[j] + (vy[j] * dt)
y[j] = math.Mod(x[j], scale)
j = j + 1
}
m_angles := angles
for j := 0; j &lt;= n_particles; {
sumanglesx := 0.0
sumanglesy := 0.0
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k &lt;= n_particles; {
if k != j {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk &amp;&amp; fyi == fyk) || (fxi+1 == fxk &amp;&amp; fyi == fyk) || (fxi-1 == fxk &amp;&amp; fyi == fyk) || (fxi == fxk &amp;&amp; fyi+1 == fyk) || (fxi == fxk &amp;&amp; fyi-1 == fyk) || (fxi+1 == fxk &amp;&amp; fyi+1 == fyk) || (fxi-1 == fxk &amp;&amp; fyi-1 == fyk) || (fxi-1 == fxk &amp;&amp; fyi+1 == fyk) || (fxi+1 == fxk &amp;&amp; fyi-1 == fyk) {
dist := math.Pow((x[k]-x[j]), 2) + math.Pow((y[k]-y[j]), 2)
if dist &lt; radius {
if k &gt; j {
sx := math.Cos(angles[k])
sy := math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else if k &lt; j {
sx := alpha * math.Cos(angles[k])
sy := alpha * math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
k = k + 1
}
m_angles[j] = math.Atan2(sumanglesx, sumanglesy) + (f_intensity*randomizer.Float64() - f_intensity)
j = j + 1
}
angles = m_angles
for f := 0; f &lt;= n_particles; {
vx[f] = v0 * float64(math.Cos(angles[f]))
vy[f] = v0 * float64(math.Sin(angles[f]))
f = f + 1
}
for h := 0; h &lt;= n_particles; {
cosangles = cosangles + (math.Cos(angles[h]))
sinangles = sinangles + (math.Sin(angles[h]))
h = h + 1
}
core := (math.Sqrt(((math.Pow(cosangles, 2)) + (math.Pow(cosangles, 2))))) / float64(n_particles)
order[i] = core
duration := time.Since(start)
fmt.Println(i)
fmt.Println(duration)
i = i + 1
}
vduration := time.Since(vstart)
log.Printf(&quot;It took : %s&quot;, vduration)
f, _ := os.Create(&quot;order.txt&quot;)
for i := 0; i &lt;= n_steps; {
s := fmt.Sprintf(&quot;%f&quot;, order[i])
w := bufio.NewWriter(f)
w.WriteString(s + &quot;\n&quot;)
w.Flush()
i = i + 1
}
}

答案1

得分: 0

首先是一些小的快速优化:

  • math.Pow(x, 2) => x * x

  • math.Sinmath.Cos => math.Sincos

但更大的问题是在n_particles2上进行循环,这是浪费的,因为显然只有相邻的粒子之间有相互作用:

for j := 0; j <= n_particles; j++ {
    fxi = math.Floor(x[j])
    fyi = math.Floor(y[j])
    for k := 0; k <= n_particles; k++ {
        fxk = math.Floor(x[k])
        fyk = math.Floor(y[k])
        if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk)
            || (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk)
            || (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk)
            || (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk)
            || (fxi+1 == fxk && fyi-1 == fyk) {
            // ...

如何在没有N2嵌套循环的情况下循环遍历相邻的粒子?也许如果你维护一个帮助器的邻接索引,将整数坐标映射到粒子,你可以在一次遍历中完成?

for i := 0; i <= n_steps; i++ {
    // ...
	type intpos struct {
		x, y int64
	}
	adjacencyIndex := make(map[intpos][]int)

	for j := 0; j <= n_particles; j++ {
        // ...
		ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
		adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j)
	}
    // ...
	for j := 0; j <= n_particles; j++ {
        // ...
		ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
		for dx := -1; dx <= 1; dx++ {
			for dy := -1; dy <= 1; dy++ {
				adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
				for _, k := range adjacentParticles {
					if j < k {
						// 处理粒子j和k之间的相互作用
					}
				}
			}
		}
        // ...

请注意,上述代码将导致每对相邻粒子之间只有一次相互作用。如果你需要双向的相互作用(即j <=> kk <=> j),将j < k更改为j != k

进一步改进的方法是在粒子移动时更新邻接索引,而不是在每次迭代中重新构建它。

英文:

First some minor quick wins:

  • math.Pow(x, 2) => x * x

  • math.Sin, math.Cos => math.Sincos

But a bigger issue is looping over n_particles<sup>2</sup>, which is wasteful given that apparently only adjacent particles interact:

for j := 0; j &lt;= n_particles; j++ {
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k &lt;= n_particles; k++ {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk &amp;&amp; fyi == fyk) || (fxi+1 == fxk &amp;&amp; fyi == fyk)
|| (fxi-1 == fxk &amp;&amp; fyi == fyk) || (fxi == fxk &amp;&amp; fyi+1 == fyk)
|| (fxi == fxk &amp;&amp; fyi-1 == fyk) || (fxi+1 == fxk &amp;&amp; fyi+1 == fyk)
|| (fxi-1 == fxk &amp;&amp; fyi-1 == fyk) || (fxi-1 == fxk &amp;&amp; fyi+1 == fyk)
|| (fxi+1 == fxk &amp;&amp; fyi-1 == fyk) {
// ...

How can you loop over adjacent particles without a N<sup>2</sup> nested loop? Perhaps if you maintain a helper adjacency index mapping integral x/y coordinates to particles, you can loop over it in a single pass?

for i := 0; i &lt;= n_steps; i++ {
// . . .
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
for j := 0; j &lt;= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j)
}
// . . .
for j := 0; j &lt;= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
for dx := -1; dx &lt;= 1; dx++ {
for dy := -1; dy &lt;= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
if j &lt; k {
// process interaction between particles j and k
}
}
}
}
// . . .

Note that the above will result in a single interaction between each pair of adjacent particles. If you need a bidirectional interaction (i.e. j &lt;=&gt; k and k &lt;=&gt; j), change j &lt; k to j != k.

As a further improvement you can update the adjacency index as particles move around instead of rebuilding it in each iteration.

huangapple
  • 本文由 发表于 2021年7月14日 15:43:29
  • 转载请务必保留本文链接:https://go.coder-hub.com/68373999.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定