英文:
How can i make this go code with multiple for loops faster?
问题
我最近开始着手这个项目。这是一个关于集体运动的模型。
在这段代码中,8192个粒子在1000000个步骤中移动,每一步中每个粒子的位置和角度都会更新。在每一步中,每个粒子的邻居都会与该特定粒子同步角度。
我用Python写了一段代码,但是运行得很慢。然后我用Go语言写了这段代码,但速度还是不够快。每一步平均需要1.3秒,这不太好。
你们有没有任何关于如何加快速度的想法?
package main
import (
"bufio"
"flag"
"fmt"
"log"
"math"
"math/rand"
"os"
"runtime"
"time"
)
const (
n_particles int = 8192
n_steps int = 1000000
dt float64 = 1.0
v0 float64 = 0.5
radius float64 = 1.0
f_intensity float64 = 1.2
scale float64 = 128.0
alpha float64 = 1 / 36
)
var (
x [n_particles + 1]float64
y [n_particles + 1]float64
angles [n_particles + 1]float64
vx [n_particles + 1]float64
vy [n_particles + 1]float64
order [n_steps + 1]float64
fxk float64
fyk float64
fxi float64
fyi float64
)
func main() {
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
//定义随机数据
for i := 0; i <= n_particles; {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
vx[i] = v0 * float64(math.Cos(angles[i]))
vy[i] = v0 * float64(math.Sin(angles[i]))
i = i + 1
}
for i := 0; i <= n_steps; {
start := time.Now()
cosangles := 0.0
sinangles := 0.0
for j := 0; j <= n_particles; {
x[j] = x[j] + (vx[j] * dt)
x[j] = math.Mod(x[j], scale)
y[j] = y[j] + (vy[j] * dt)
y[j] = math.Mod(x[j], scale)
j = j + 1
}
m_angles := angles
for j := 0; j <= n_particles; {
sumanglesx := 0.0
sumanglesy := 0.0
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; {
if k != j {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk) || (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk) || (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk) || (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk) || (fxi+1 == fxk && fyi-1 == fyk) {
dist := math.Pow((x[k]-x[j]), 2) + math.Pow((y[k]-y[j]), 2)
if dist < radius {
if k > j {
sx := math.Cos(angles[k])
sy := math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else if k < j {
sx := alpha * math.Cos(angles[k])
sy := alpha * math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
k = k + 1
}
m_angles[j] = math.Atan2(sumanglesx, sumanglesy) + (f_intensity*randomizer.Float64() - f_intensity)
j = j + 1
}
angles = m_angles
for f := 0; f <= n_particles; {
vx[f] = v0 * float64(math.Cos(angles[f]))
vy[f] = v0 * float64(math.Sin(angles[f]))
f = f + 1
}
for h := 0; h <= n_particles; {
cosangles = cosangles + (math.Cos(angles[h]))
sinangles = sinangles + (math.Sin(angles[h]))
h = h + 1
}
core := (math.Sqrt(((math.Pow(cosangles, 2)) + (math.Pow(cosangles, 2))))) / float64(n_particles)
order[i] = core
duration := time.Since(start)
fmt.Println(i)
fmt.Println(duration)
i = i + 1
}
vduration := time.Since(vstart)
log.Printf("It took : %s", vduration)
f, _ := os.Create("order.txt")
for i := 0; i <= n_steps; {
s := fmt.Sprintf("%f", order[i])
w := bufio.NewWriter(f)
w.WriteString(s + "\n")
w.Flush()
i = i + 1
}
}
英文:
I recently started to work on this project . It's a model about collective motion.
In this code 8192 particle move in 1000000 steps and in each step each particles position and angle
updates. in each step neighbors of each particle sync their angle with that specific particle.
I wrote a code in python but that was so slow . then I wrote this code in go but it's still not fast . each step takes average 1.3 s which is not good .
do u guys have any idea about making it faster ?
package main
import (
"bufio"
"flag"
"fmt"
"log"
"math"
"math/rand"
"os"
"runtime"
"time"
)
const (
n_particles int = 8192
n_steps int = 1000000
dt float64 = 1.0
v0 float64 = 0.5
radius float64 = 1.0
f_intensity float64 = 1.2
scale float64 = 128.0
alpha float64 = 1 / 36
)
var (
x [n_particles + 1]float64
y [n_particles + 1]float64
angles [n_particles + 1]float64
vx [n_particles + 1]float64
vy [n_particles + 1]float64
order [n_steps + 1]float64
fxk float64
fyk float64
fxi float64
fyi float64
)
func main() {
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
//defining random data
for i := 0; i <= n_particles; {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
vx[i] = v0 * float64(math.Cos(angles[i]))
vy[i] = v0 * float64(math.Sin(angles[i]))
i = i + 1
}
for i := 0; i <= n_steps; {
start := time.Now()
cosangles := 0.0
sinangles := 0.0
for j := 0; j <= n_particles; {
x[j] = x[j] + (vx[j] * dt)
x[j] = math.Mod(x[j], scale)
y[j] = y[j] + (vy[j] * dt)
y[j] = math.Mod(x[j], scale)
j = j + 1
}
m_angles := angles
for j := 0; j <= n_particles; {
sumanglesx := 0.0
sumanglesy := 0.0
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; {
if k != j {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk) || (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk) || (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk) || (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk) || (fxi+1 == fxk && fyi-1 == fyk) {
dist := math.Pow((x[k]-x[j]), 2) + math.Pow((y[k]-y[j]), 2)
if dist < radius {
if k > j {
sx := math.Cos(angles[k])
sy := math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else if k < j {
sx := alpha * math.Cos(angles[k])
sy := alpha * math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
k = k + 1
}
m_angles[j] = math.Atan2(sumanglesx, sumanglesy) + (f_intensity*randomizer.Float64() - f_intensity)
j = j + 1
}
angles = m_angles
for f := 0; f <= n_particles; {
vx[f] = v0 * float64(math.Cos(angles[f]))
vy[f] = v0 * float64(math.Sin(angles[f]))
f = f + 1
}
for h := 0; h <= n_particles; {
cosangles = cosangles + (math.Cos(angles[h]))
sinangles = sinangles + (math.Sin(angles[h]))
h = h + 1
}
core := (math.Sqrt(((math.Pow(cosangles, 2)) + (math.Pow(cosangles, 2))))) / float64(n_particles)
order[i] = core
duration := time.Since(start)
fmt.Println(i)
fmt.Println(duration)
i = i + 1
}
vduration := time.Since(vstart)
log.Printf("It took : %s", vduration)
f, _ := os.Create("order.txt")
for i := 0; i <= n_steps; {
s := fmt.Sprintf("%f", order[i])
w := bufio.NewWriter(f)
w.WriteString(s + "\n")
w.Flush()
i = i + 1
}
}
答案1
得分: 0
首先是一些小的快速优化:
-
math.Pow(x, 2)
=>x * x
-
math.Sin
、math.Cos
=>math.Sincos
但更大的问题是在n_particles2上进行循环,这是浪费的,因为显然只有相邻的粒子之间有相互作用:
for j := 0; j <= n_particles; j++ {
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; k++ {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk)
|| (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk)
|| (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk)
|| (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk)
|| (fxi+1 == fxk && fyi-1 == fyk) {
// ...
如何在没有N2嵌套循环的情况下循环遍历相邻的粒子?也许如果你维护一个帮助器的邻接索引,将整数坐标映射到粒子,你可以在一次遍历中完成?
for i := 0; i <= n_steps; i++ {
// ...
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
for j := 0; j <= n_particles; j++ {
// ...
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j)
}
// ...
for j := 0; j <= n_particles; j++ {
// ...
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
for dx := -1; dx <= 1; dx++ {
for dy := -1; dy <= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
if j < k {
// 处理粒子j和k之间的相互作用
}
}
}
}
// ...
请注意,上述代码将导致每对相邻粒子之间只有一次相互作用。如果你需要双向的相互作用(即j <=> k
和k <=> j
),将j < k
更改为j != k
。
进一步改进的方法是在粒子移动时更新邻接索引,而不是在每次迭代中重新构建它。
英文:
First some minor quick wins:
-
math.Pow(x, 2)
=>x * x
-
math.Sin
,math.Cos
=>math.Sincos
But a bigger issue is looping over n_particles<sup>2</sup>, which is wasteful given that apparently only adjacent particles interact:
for j := 0; j <= n_particles; j++ {
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; k++ {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk)
|| (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk)
|| (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk)
|| (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk)
|| (fxi+1 == fxk && fyi-1 == fyk) {
// ...
How can you loop over adjacent particles without a N<sup>2</sup> nested loop? Perhaps if you maintain a helper adjacency index mapping integral x/y coordinates to particles, you can loop over it in a single pass?
for i := 0; i <= n_steps; i++ {
// . . .
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j)
}
// . . .
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
for dx := -1; dx <= 1; dx++ {
for dy := -1; dy <= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
if j < k {
// process interaction between particles j and k
}
}
}
}
// . . .
Note that the above will result in a single interaction between each pair of adjacent particles. If you need a bidirectional interaction (i.e. j <=> k
and k <=> j
), change j < k
to j != k
.
As a further improvement you can update the adjacency index as particles move around instead of rebuilding it in each iteration.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论