Golang:数组的索引效率

huangapple go评论84阅读模式
英文:

golang: index efficiency of array

问题

这是一个简单的程序。

测试环境:Debian 8,Go 1.4.2

union.go:

package main

import "fmt"

type A struct {
    t int32
    u int64
}

func test() (total int64) {
    a := [...]A{{1, 100}, {2, 3}}

    for i := 0; i < 5000000000; i++ {
        p := &a[i%2]
        total += p.u
    }
    return
}

func main() {
    total := test()
    fmt.Println(total)
}

union.c:

#include <stdio.h>

struct A {
    int t;
    long u;
};

long test()
{
    struct A a[2];
    a[0].t = 1;
    a[0].u = 100;
    a[1].t = 2;
    a[1].u = 3;

    long total = 0;
    long i;
    for (i = 0; i < 5000000000; i++) {
        struct A* p = &a[i % 2];
        total += p->u;
    }
    return total;
}

int main()
{
    long total = test();
    printf("%ld\n", total);
}

结果比较:

Go:

257500000000

real    0m9.167s
user    0m9.196s
sys     0m0.012s

C:

257500000000

real    0m3.585s
user    0m3.560s
sys     0m0.008s

看起来Go编译生成了很多奇怪的汇编代码(你可以使用objdump -D来查看)。

例如,为什么movabs $0x12a05f200,%rbp出现了两次?

400c60:       31 c0                   xor    %eax,%eax
400c62:       48 bd 00 f2 05 2a 01    movabs $0x12a05f200,%rbp
400c69:       00 00 00 
400c6c:       48 39 e8                cmp    %rbp,%rax
400c6f:       7d 46                   jge    400cb7 <main.test+0xb7>
400c71:       48 89 c1                mov    %rax,%rcx
400c74:       48 c1 f9 3f             sar    $0x3f,%rcx
400c78:       48 89 c3                mov    %rax,%rbx
400c7b:       48 29 cb                sub    %rcx,%rbx
400c7e:       48 83 e3 01             and    $0x1,%rbx
400c82:       48 01 cb                add    %rcx,%rbx
400c85:       48 8d 2c 24             lea    (%rsp),%rbp
400c89:       48 83 fb 02             cmp    $0x2,%rbx
400c8d:       73 2d                   jae    400cbc <main.test+0xbc>
400c8f:       48 6b db 10             imul   $0x10,%rbx,%rbx
400c93:       48 01 dd                add    %rbx,%rbp
400c96:       48 8b 5d 08             mov    0x8(%rbp),%rbx
400c9a:       48 01 f3                add    %rsi,%rbx
400c9d:       48 89 de                mov    %rbx,%rsi
400ca0:       48 89 5c 24 28          mov    %rbx,0x28(%rsp)
400ca5:       48 ff c0                inc    %rax
400ca8:       48 bd 00 f2 05 2a 01    movabs $0x12a05f200,%rbp
400caf:       00 00 00 
400cb2:       48 39 e8                cmp    %rbp,%rax
400cb5:       7c ba                   jl     400c71 <main.test+0x71>
400cb7:       48 83 c4 20             add    $0x20,%rsp
400cbb:       c3                      retq   
400cbc:       e8 6f e0 00 00          callq  40ed30 <runtime.panicindex>
400cc1:       0f 0b                   ud2    
      ...

而C的汇编代码更加简洁:

0000000000400570 <test>:
  400570:       48 c7 44 24 e0 64 00    movq   $0x64,-0x20(%rsp)
  400577:       00 00 
  400579:       48 c7 44 24 f0 03 00    movq   $0x3,-0x10(%rsp)
  400580:       00 00 
  400582:       b9 64 00 00 00          mov    $0x64,%ecx
  400587:       31 d2                   xor    %edx,%edx
  400589:       31 c0                   xor    %eax,%eax
  40058b:       48 be 00 f2 05 2a 01    movabs $0x12a05f200,%rsi
  400592:       00 00 00 
  400595:       eb 18                   jmp    4005af <test+0x3f>
  400597:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  40059e:       00 00 
  4005a0:       48 89 d1                mov    %rdx,%rcx
  4005a3:       83 e1 01                and    $0x1,%ecx
  4005a6:       48 c1 e1 04             shl    $0x4,%rcx
  4005aa:       48 8b 4c 0c e0          mov    -0x20(%rsp,%rcx,1),%rcx
  4005af:       48 83 c2 01             add    $0x1,%rdx
  4005b3:       48 01 c8                add    %rcx,%rax
  4005b6:       48 39 f2                cmp    %rsi,%rdx
  4005b9:       75 e5                   jne    4005a0 <test+0x30>
  4005bb:       f3 c3                   repz retq 
  4005bd:       0f 1f 00                nopl   (%rax)

有人能解释一下吗?谢谢!

英文:

It's a simple program.
test environment: debian 8, go 1.4.2

union.go:

package main

import &quot;fmt&quot;

type A struct {
	t int32
	u int64
}

func test() (total int64) {
	a := [...]A{{1, 100}, {2, 3}}

	for i := 0; i &lt; 5000000000; i++ {
		p := &amp;a[i%2]
		total += p.u
	}
	return
}
func main() {
	total := test()
	fmt.Println(total)
}

union.c:

#include &lt;stdio.h&gt;

struct A {
	int t;
	long u;
};

long test()
{
	struct A a[2];
	a[0].t = 1;
	a[0].u = 100;
	a[1].t = 2;
	a[1].u = 3;
	
	long total = 0;
	long i;
	for (i = 0; i &lt; 5000000000; i++) {
		struct A* p = &amp;a[i % 2];
		total += p-&gt;u;
	}
	return total;
}
int main()
{
	long total = test();
	printf(&quot;%ld\n&quot;, total);
}

result compare:

go:

257500000000

real	0m9.167s
user	0m9.196s
sys	0m0.012s

C:

257500000000

real	0m3.585s
user	0m3.560s
sys	0m0.008s

It seems that the go compiles lot of weird assembly codes (you could use objdump -D to check it).

For example, why movabs $0x12a05f200,%rbp appears twice?

  400c60:       31 c0                   xor    %eax,%eax
  400c62:       48 bd 00 f2 05 2a 01    movabs $0x12a05f200,%rbp
  400c69:       00 00 00 
  400c6c:       48 39 e8                cmp    %rbp,%rax
  400c6f:       7d 46                   jge    400cb7 &lt;main.test+0xb7&gt;
  400c71:       48 89 c1                mov    %rax,%rcx
  400c74:       48 c1 f9 3f             sar    $0x3f,%rcx
  400c78:       48 89 c3                mov    %rax,%rbx
  400c7b:       48 29 cb                sub    %rcx,%rbx
  400c7e:       48 83 e3 01             and    $0x1,%rbx
  400c82:       48 01 cb                add    %rcx,%rbx
  400c85:       48 8d 2c 24             lea    (%rsp),%rbp
  400c89:       48 83 fb 02             cmp    $0x2,%rbx
  400c8d:       73 2d                   jae    400cbc &lt;main.test+0xbc&gt;
  400c8f:       48 6b db 10             imul   $0x10,%rbx,%rbx
  400c93:       48 01 dd                add    %rbx,%rbp
  400c96:       48 8b 5d 08             mov    0x8(%rbp),%rbx
  400c9a:       48 01 f3                add    %rsi,%rbx
  400c9d:       48 89 de                mov    %rbx,%rsi
  400ca0:       48 89 5c 24 28          mov    %rbx,0x28(%rsp)
  400ca5:       48 ff c0                inc    %rax
  400ca8:       48 bd 00 f2 05 2a 01    movabs $0x12a05f200,%rbp
  400caf:       00 00 00 
  400cb2:       48 39 e8                cmp    %rbp,%rax
  400cb5:       7c ba                   jl     400c71 &lt;main.test+0x71&gt;
  400cb7:       48 83 c4 20             add    $0x20,%rsp
  400cbb:       c3                      retq   
  400cbc:       e8 6f e0 00 00          callq  40ed30 &lt;runtime.panicindex&gt;
  400cc1:       0f 0b                   ud2    
        ...

while the C assembly is more clean:

0000000000400570 &lt;test&gt;:
  400570:       48 c7 44 24 e0 64 00    movq   $0x64,-0x20(%rsp)
  400577:       00 00 
  400579:       48 c7 44 24 f0 03 00    movq   $0x3,-0x10(%rsp)
  400580:       00 00 
  400582:       b9 64 00 00 00          mov    $0x64,%ecx
  400587:       31 d2                   xor    %edx,%edx
  400589:       31 c0                   xor    %eax,%eax
  40058b:       48 be 00 f2 05 2a 01    movabs $0x12a05f200,%rsi
  400592:       00 00 00 
  400595:       eb 18                   jmp    4005af &lt;test+0x3f&gt;
  400597:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
  40059e:       00 00 
  4005a0:       48 89 d1                mov    %rdx,%rcx
  4005a3:       83 e1 01                and    $0x1,%ecx
  4005a6:       48 c1 e1 04             shl    $0x4,%rcx
  4005aa:       48 8b 4c 0c e0          mov    -0x20(%rsp,%rcx,1),%rcx
  4005af:       48 83 c2 01             add    $0x1,%rdx
  4005b3:       48 01 c8                add    %rcx,%rax
  4005b6:       48 39 f2                cmp    %rsi,%rdx
  4005b9:       75 e5                   jne    4005a0 &lt;test+0x30&gt;
  4005bb:       f3 c3                   repz retq 
  4005bd:       0f 1f 00                nopl   (%rax)

Could somebody explain it? Thanks!

答案1

得分: 3

主要区别在于数组边界检查。在Go程序的反汇编转储中,可以看到以下内容:

400c89:       48 83 fb 02             cmp    $0x2,%rbx
400c8d:       73 2d                   jae    400cbc <main.test+0xbc>
...
400cbc:       e8 6f e0 00 00          callq  40ed30 <runtime.panicindex>
400cc1:       0f 0b                   ud2    

因此,如果 %rbx 大于或等于2,则跳转到调用 runtime.panicindex 的位置。考虑到你正在使用大小为2的数组,这显然是边界检查。你可以认为编译器应该足够聪明,在可以静态确定索引范围的特定情况下跳过边界检查,但似乎它还不够聪明。

虽然在这个微基准测试中你看到了明显的性能差异,但值得考虑的是,这是否真正代表了你实际代码的情况。如果在循环中还有其他操作,差异可能不太明显。

虽然边界检查确实会带来一些开销,但在许多情况下,这比程序继续执行未定义行为的替代方案要好。

英文:

The main difference is the the array bounds checking. In the disassembly dump for the Go program, there is:

400c89:       48 83 fb 02             cmp    $0x2,%rbx
400c8d:       73 2d                   jae    400cbc &lt;main.test+0xbc&gt;
...
400cbc:       e8 6f e0 00 00          callq  40ed30 &lt;runtime.panicindex&gt;
400cc1:       0f 0b                   ud2    

So if %rbx is greater than or equal to 2, then it jumps down to a call to runtime.panicindex. Given you're working with an array of size 2, that is clearly the bounds check. You could make the argument that the compiler should be smart enough to skip the bounds check in this particular case where the range of the index can be determined statically, but it seems that it isn't smart enough to do so yet.

While you're seeing a noticeable performance difference for this micro-benchmark, it might be worth considering whether this is actually representative of your actual code. If you're doing other stuff in your loop, the difference is likely to be less noticeable.

And while bounds checking does have a cost, in many cases it is better than the alternative of the program continuing with undefined behaviour.

huangapple
  • 本文由 发表于 2015年7月22日 14:40:56
  • 转载请务必保留本文链接:https://go.coder-hub.com/31555672.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定