Golang:切片性能优化

huangapple go评论121阅读模式
英文:

Golang: make slice performance

问题

为什么这些基准测试结果差异如此之大?

func Benchmark1(b *testing.B) {
    for n := 0; n < b.N; n++ {
        _ = make([]byte, 8)
    }
}

func Benchmark2(b *testing.B) {
    length := 1
    for n := 0; n < b.N; n++ {
        _ = make([]byte, 7+length)
    }
}

基准测试结果如下:

Benchmark1-8                   	500000000	         3.37 ns/op
Benchmark2-8                   	30000000	        50.6 ns/op
英文:

Why these benchmark results are so different?

func Benchmark1(b *testing.B) {
	for n := 0; n &lt; b.N; n++ {
		_ = make([]byte, 8)
	}
}

func Benchmark2(b *testing.B) {
	length := 1
	for n := 0; n &lt; b.N; n++ {
		_ = make([]byte, 7+length)
	}
}

Benchmark results:

Benchmark1-8                   	500000000	         3.37 ns/op
Benchmark2-8                   	30000000	        50.6 ns/op

答案1

得分: 14

常量表达式8在编译时进行求值。make在goroutine堆栈上分配(廉价)。变量表达式7 + length在运行时进行求值。make在程序堆上分配(昂贵)。如果make的大小对于堆栈分配来说太大(例如,常量(64*1024)和变量(64*1024-1)+length),那么两个分配都将在堆上进行,并且基准测试时间相同。

a_test.go:

package a

import "testing"

func Benchmark1(b *testing.B) {
    for n := 0; n < b.N; n++ {
        _ = make([]byte, 8)
    }
}

func Benchmark2(b *testing.B) {
    length := 1
    for n := 0; n < b.N; n++ {
        _ = make([]byte, 7+length)
    }
}

Go伪汇编代码:

Benchmark1:

"".Benchmark1 t=1 size=112 value=0 args=0x8 locals=0x20
    0x0000 00000 (a_test.go:5)    TEXT    "".Benchmark1(SB), $32-8
    0x0000 00000 (a_test.go:5)    SUBQ    $32, SP
    0x0004 00004 (a_test.go:5)    MOVQ    "".b+40(FP), CX
    0x0009 00009 (a_test.go:5)    FUNCDATA    $0, gclocals·87d20ce1b58390b294df80b886db78bf(SB)
    0x0009 00009 (a_test.go:5)    FUNCDATA    $1, gclocals·790e5cc5051fc0affc980ade09e929ec(SB)
    0x0009 00009 (a_test.go:6)    MOVQ    $0, AX
    0x000b 00011 (a_test.go:6)    NOP
    0x000b 00011 (a_test.go:6)    MOVQ    112(CX), BX
    0x000f 00015 (a_test.go:6)    CMPQ    BX, AX
    0x0012 00018 (a_test.go:6)    JLE    $0, 98
    0x0014 00020 (a_test.go:7)    MOVQ    $0, BX
    0x0016 00022 (a_test.go:7)    MOVB    BL, "".autotmp_0001(SP)
    0x0019 00025 (a_test.go:7)    MOVB    BL, "".autotmp_0001+1(SP)
    0x001d 00029 (a_test.go:7)    MOVB    BL, "".autotmp_0001+2(SP)
    0x0021 00033 (a_test.go:7)    MOVB    BL, "".autotmp_0001+3(SP)
    0x0025 00037 (a_test.go:7)    MOVB    BL, "".autotmp_0001+4(SP)
    0x0029 00041 (a_test.go:7)    MOVB    BL, "".autotmp_0001+5(SP)
    0x002d 00045 (a_test.go:7)    MOVB    BL, "".autotmp_0001+6(SP)
    0x0031 00049 (a_test.go:7)    MOVB    BL, "".autotmp_0001+7(SP)
    0x0035 00053 (a_test.go:7)    LEAQ    "".autotmp_0001(SP), BX
    0x0039 00057 (a_test.go:7)    CMPQ    BX, $0
    0x003d 00061 (a_test.go:7)    JEQ    $1, 103
    0x003f 00063 (a_test.go:7)    MOVQ    $8, "".autotmp_0002+16(SP)
    0x0048 00072 (a_test.go:7)    MOVQ    $8, "".autotmp_0002+24(SP)
    0x0051 00081 (a_test.go:7)    MOVQ    BX, "".autotmp_0002+8(SP)
    0x0056 00086 (a_test.go:6)    INCQ    AX
    0x0059 00089 (a_test.go:6)    NOP
    0x0059 00089 (a_test.go:6)    MOVQ    112(CX), BX
    0x005d 00093 (a_test.go:6)    CMPQ    BX, AX
    0x0060 00096 (a_test.go:6)    JGT    $0, 20
    0x0062 00098 (a_test.go:9)    ADDQ    $32, SP
    0x0066 00102 (a_test.go:9)    RET
    0x0067 00103 (a_test.go:7)    MOVL    AX, (BX)
    0x0069 00105 (a_test.go:7)    JMP    63

Benchmark2:

"".Benchmark2 t=1 size=144 value=0 args=0x8 locals=0x58
    0x0000 00000 (a_test.go:11)    TEXT    "".Benchmark2(SB), $88-8
    0x0000 00000 (a_test.go:11)    MOVQ    (TLS), CX
    0x0009 00009 (a_test.go:11)    CMPQ    SP, 16(CX)
    0x000d 00013 (a_test.go:11)    JLS    129
    0x000f 00015 (a_test.go:11)    SUBQ    $88, SP
    0x0013 00019 (a_test.go:11)    FUNCDATA    $0, gclocals·87d20ce1b58390b294df80b886db78bf(SB)
    0x0013 00019 (a_test.go:11)    FUNCDATA    $1, gclocals·790e5cc5051fc0affc980ade09e929ec(SB)
    0x0013 00019 (a_test.go:12)    MOVQ    $1, "".length+56(SP)
    0x001c 00028 (a_test.go:13)    MOVQ    $0, AX
    0x001e 00030 (a_test.go:13)    MOVQ    "".b+96(FP), BP
    0x0023 00035 (a_test.go:13)    NOP
    0x0023 00035 (a_test.go:13)    MOVQ    112(BP), BX
    0x0027 00039 (a_test.go:13)    MOVQ    AX, "".n+48(SP)
    0x002c 00044 (a_test.go:13)    CMPQ    BX, AX
    0x002f 00047 (a_test.go:13)    JLE    $0, 124
    0x0031 00049 (a_test.go:14)    MOVQ    "".length+56(SP), AX
    0x0036 00054 (a_test.go:14)    ADDQ    $7, AX
    0x003a 00058 (a_test.go:14)    LEAQ    type.[]uint8(SB), BX
    0x0041 00065 (a_test.go:14)    MOVQ    BX, (SP)
    0x0045 00069 (a_test.go:14)    MOVQ    AX, 8(SP)
    0x004a 00074 (a_test.go:14)    MOVQ    AX, 16(SP)
    0x004f 00079 (a_test.go:14)    PCDATA    $0, $0
    0x004f 00079 (a_test.go:14)    CALL    runtime.makeslice(SB)
    0x0054 00084 (a_test.go:14)    MOVQ    24(SP), BX
    0x0059 00089 (a_test.go:14)    MOVQ    BX, "".autotmp_0005+64(SP)
    0x005e 00094 (a_test.go:14)    MOVQ    32(SP), BX
    0x0063 00099 (a_test.go:14)    MOVQ    BX, "".autotmp_0005+72(SP)
    0x0068 00104 (a_test.go:14)    MOVQ    40(SP), BX
    0x006d 00109 (a_test.go:14)    MOVQ    BX, "".autotmp_0005+80(SP)
    0x0072 00114 (a_test.go:13)    MOVQ    "".n+48(SP), AX
    0x0077 00119 (a_test.go:13)    INCQ    AX
    0x007a 00122 (a_test.go:13)    NOP
    0x007a 00122 (a_test.go:13)    JMP    30
    0x007c 00124 (a_test.go:16)    ADDQ    $88, SP
    0x0080 00128 (a_test.go:16)    RET
    0x0081 00129 (a_test.go:11)    CALL    runtime.morestack_noctxt(SB)
    0x0086 00134 (a_test.go:11)    JMP    0
英文:

The constant expression 8 is evaluated at compile time. The make is allocated on a goroutine stack (cheap). The variable expression 7 + length is evaluated at run time. The make is allocated on the program heap (expensive). If the make size is too large for a stack allocation (for example, constant (64*1024) and variable (64*1024-1)+length) then both allocations are made on the heap and the benchmark times are the same.

$ go tool compile -m a_test.go
a_test.go:5: Benchmark1 b does not escape
a_test.go:7: Benchmark1 make([]byte, 8) does not escape
a_test.go:14: make([]byte, 7 + length) escapes to heap
a_test.go:11: Benchmark2 b does not escape
$ 

a_test.go:

package a
import &quot;testing&quot;
func Benchmark1(b *testing.B) {
for n := 0; n &lt; b.N; n++ {
_ = make([]byte, 8)
}
}
func Benchmark2(b *testing.B) {
length := 1
for n := 0; n &lt; b.N; n++ {
_ = make([]byte, 7+length)
}
}

Go pseudo-assembler:

$ go tool compile -S a_test.go

Benchmark1:

&quot;&quot;.Benchmark1 t=1 size=112 value=0 args=0x8 locals=0x20
0x0000 00000 (a_test.go:5)	TEXT	&quot;&quot;.Benchmark1(SB), $32-8
0x0000 00000 (a_test.go:5)	SUBQ	$32, SP
0x0004 00004 (a_test.go:5)	MOVQ	&quot;&quot;.b+40(FP), CX
0x0009 00009 (a_test.go:5)	FUNCDATA	$0, gclocals&#183;87d20ce1b58390b294df80b886db78bf(SB)
0x0009 00009 (a_test.go:5)	FUNCDATA	$1, gclocals&#183;790e5cc5051fc0affc980ade09e929ec(SB)
0x0009 00009 (a_test.go:6)	MOVQ	$0, AX
0x000b 00011 (a_test.go:6)	NOP
0x000b 00011 (a_test.go:6)	MOVQ	112(CX), BX
0x000f 00015 (a_test.go:6)	CMPQ	BX, AX
0x0012 00018 (a_test.go:6)	JLE	$0, 98
0x0014 00020 (a_test.go:7)	MOVQ	$0, BX
0x0016 00022 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001(SP)
0x0019 00025 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+1(SP)
0x001d 00029 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+2(SP)
0x0021 00033 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+3(SP)
0x0025 00037 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+4(SP)
0x0029 00041 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+5(SP)
0x002d 00045 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+6(SP)
0x0031 00049 (a_test.go:7)	MOVB	BL, &quot;&quot;.autotmp_0001+7(SP)
0x0035 00053 (a_test.go:7)	LEAQ	&quot;&quot;.autotmp_0001(SP), BX
0x0039 00057 (a_test.go:7)	CMPQ	BX, $0
0x003d 00061 (a_test.go:7)	JEQ	$1, 103
0x003f 00063 (a_test.go:7)	MOVQ	$8, &quot;&quot;.autotmp_0002+16(SP)
0x0048 00072 (a_test.go:7)	MOVQ	$8, &quot;&quot;.autotmp_0002+24(SP)
0x0051 00081 (a_test.go:7)	MOVQ	BX, &quot;&quot;.autotmp_0002+8(SP)
0x0056 00086 (a_test.go:6)	INCQ	AX
0x0059 00089 (a_test.go:6)	NOP
0x0059 00089 (a_test.go:6)	MOVQ	112(CX), BX
0x005d 00093 (a_test.go:6)	CMPQ	BX, AX
0x0060 00096 (a_test.go:6)	JGT	$0, 20
0x0062 00098 (a_test.go:9)	ADDQ	$32, SP
0x0066 00102 (a_test.go:9)	RET
0x0067 00103 (a_test.go:7)	MOVL	AX, (BX)
0x0069 00105 (a_test.go:7)	JMP	63

Benchmark2:

&quot;&quot;.Benchmark2 t=1 size=144 value=0 args=0x8 locals=0x58
0x0000 00000 (a_test.go:11)	TEXT	&quot;&quot;.Benchmark2(SB), $88-8
0x0000 00000 (a_test.go:11)	MOVQ	(TLS), CX
0x0009 00009 (a_test.go:11)	CMPQ	SP, 16(CX)
0x000d 00013 (a_test.go:11)	JLS	129
0x000f 00015 (a_test.go:11)	SUBQ	$88, SP
0x0013 00019 (a_test.go:11)	FUNCDATA	$0, gclocals&#183;87d20ce1b58390b294df80b886db78bf(SB)
0x0013 00019 (a_test.go:11)	FUNCDATA	$1, gclocals&#183;790e5cc5051fc0affc980ade09e929ec(SB)
0x0013 00019 (a_test.go:12)	MOVQ	$1, &quot;&quot;.length+56(SP)
0x001c 00028 (a_test.go:13)	MOVQ	$0, AX
0x001e 00030 (a_test.go:13)	MOVQ	&quot;&quot;.b+96(FP), BP
0x0023 00035 (a_test.go:13)	NOP
0x0023 00035 (a_test.go:13)	MOVQ	112(BP), BX
0x0027 00039 (a_test.go:13)	MOVQ	AX, &quot;&quot;.n+48(SP)
0x002c 00044 (a_test.go:13)	CMPQ	BX, AX
0x002f 00047 (a_test.go:13)	JLE	$0, 124
0x0031 00049 (a_test.go:14)	MOVQ	&quot;&quot;.length+56(SP), AX
0x0036 00054 (a_test.go:14)	ADDQ	$7, AX
0x003a 00058 (a_test.go:14)	LEAQ	type.[]uint8(SB), BX
0x0041 00065 (a_test.go:14)	MOVQ	BX, (SP)
0x0045 00069 (a_test.go:14)	MOVQ	AX, 8(SP)
0x004a 00074 (a_test.go:14)	MOVQ	AX, 16(SP)
0x004f 00079 (a_test.go:14)	PCDATA	$0, $0
0x004f 00079 (a_test.go:14)	CALL	runtime.makeslice(SB)
0x0054 00084 (a_test.go:14)	MOVQ	24(SP), BX
0x0059 00089 (a_test.go:14)	MOVQ	BX, &quot;&quot;.autotmp_0005+64(SP)
0x005e 00094 (a_test.go:14)	MOVQ	32(SP), BX
0x0063 00099 (a_test.go:14)	MOVQ	BX, &quot;&quot;.autotmp_0005+72(SP)
0x0068 00104 (a_test.go:14)	MOVQ	40(SP), BX
0x006d 00109 (a_test.go:14)	MOVQ	BX, &quot;&quot;.autotmp_0005+80(SP)
0x0072 00114 (a_test.go:13)	MOVQ	&quot;&quot;.n+48(SP), AX
0x0077 00119 (a_test.go:13)	INCQ	AX
0x007a 00122 (a_test.go:13)	NOP
0x007a 00122 (a_test.go:13)	JMP	30
0x007c 00124 (a_test.go:16)	ADDQ	$88, SP
0x0080 00128 (a_test.go:16)	RET
0x0081 00129 (a_test.go:11)	CALL	runtime.morestack_noctxt(SB)
0x0086 00134 (a_test.go:11)	JMP	0

huangapple
  • 本文由 发表于 2015年10月25日 08:41:15
  • 转载请务必保留本文链接:https://go.coder-hub.com/33324912.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定