这段Go代码如何比C版本更快?

huangapple go评论151阅读模式
英文:

How can this Go code be faster than the C version

问题

根据我所了解,以下程序是相同的。然而,对于小规模的数据(<30000),Go版本的速度要快得多。

在Go中大小为1000的时间35.417μs
在C中大小为1000的时间74.000000μs

架构:
Darwin,arm64

C代码是使用以下命令编译的:

clang -O2 -march=native -mtune=native -g main.c

Go代码如下:

package main

import (
   "fmt"
   "math/rand"
   "time"
)

func ArraySplice(array []int, index int) []int {
   return append(array[:index], array[index+1:]...)
}

func main() {
   s := 1000;
   array := make([]int, s)

   for i := 0; i < s; i++ {
      array[i] = rand.Intn(s)
   }

   start := time.Now()
   for len(array) > 0 {
      array = ArraySplice(array, array[len(array)/2] % len(array))
   }
   end := time.Now()
   fmt.Printf("大小为%d的时间:%v\n", s, end.Sub(start))
}

C代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

static inline size_t Array_Splice(int* array, size_t size, size_t index) {
   if (index >= size)
      return size;
   for (size_t i = index; i < size - 1; i++) {
      array[i] = array[i + 1];
   }
   return size - 1;
}

int main(void)
{
   const size_t size = 1000;
   size_t array_size = size;
   int* array = malloc(array_size * sizeof(int));
   if (array == NULL) {
      return 1;
   }

   for (size_t i = 0; i < size; i++) {
      array[i] = rand() % size;
   }

   const clock_t start = clock();
   while (array_size > 0) {
      array_size = Array_Splice(array, array_size, array[array_size/2] % array_size);
   }
   const clock_t end = clock();
   const double time_taken = (double)(end - start) / (CLOCKS_PER_SEC / 1000000.0f);
   printf("大小为%lu的时间:%fμs\n", size, time_taken);

   free(array);
   return 0;
}

我尝试对两者的代码进行反汇编,但Go的汇编代码很难阅读。时间记录方法没有错误,因为完成时间与数组大小成线性关系。值得注意的是,对于较大的数组,C开始显著领先,这可能是由于增加的垃圾回收调用。

英文:

The following programs are, as far as I can tell, identical. However, for small sizes (&lt;30000), the go version is considerably faster.

Time taken for size 1000 in Go: 35.417&#181;s
Time taken for size 1000 in C:  74.000000us

Architecture:
Darwin, arm64

The C code was compiled with the following command:

clang -O2 -march=native -mtune=native -g main.c

Go:

package main

import (
   &quot;fmt&quot;
   &quot;math/rand&quot;
   &quot;time&quot;
)

func ArraySplice(array []int, index int) []int {
   return append(array[:index], array[index+1:]...)
}

func main() {
   s := 1000;
   array := make([]int, s)

   for i := 0; i &lt; s; i++ {
      array[i] = rand.Intn(s)
   }

   start := time.Now()
   for len(array) &gt; 0 {
      array = ArraySplice(array, array[len(array)/2] % len(array))
   }
   end := time.Now()
   fmt.Printf(&quot;Time taken for size %d: %v\n&quot;, s, end.Sub(start))
}

C:

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;time.h&gt;

static inline size_t Array_Splice(int* array, size_t size, size_t index) {
   if (index &gt;= size)
      return size;
   for (size_t i = index; i &lt; size - 1; i++) {
      array[i] = array[i + 1];
   }
   return size - 1;
}

int main(void)
{
   const size_t size = 1000;
   size_t array_size = size;
   int* array = malloc(array_size * sizeof(int));
   if (array == NULL) {
      return 1;
   }

   for (size_t i = 0; i &lt; size; i++) {
      array[i] = rand() % size;
   }

   const clock_t start = clock();
   while (array_size &gt; 0) {
      array_size = Array_Splice(array, array_size, array[array_size/2] % array_size);
   }
   const clock_t end = clock();
   const double time_taken = (double)(end - start) / (CLOCKS_PER_SEC / 1000000.0f);
   printf(&quot;Time taken for size %lu: %fus\n&quot;, size, time_taken);

   free(array);
   return 0;
}

I have attempted to disassemble the code for both, but Go's assembly is quite hard to read.
The error is not in the timekeeping method, as the time to complete scales linearly with the array size.
Of note is that for larger arrays, C starts to pull ahead significantly, presumably due to increased GC calls.

答案1

得分: 1

我对go及其特定的随机数函数不太熟悉。

但是,我怀疑gomath/rand函数rand.Intn生成的数字与libc的rand函数不会相同。

通常情况下,数据并不重要,但我们将随机值用作数组的索引,以获取我们要修改的数组的大小/长度。

也就是说,在go程序中,我们可能使用索引50,而C程序可能使用索引137。

因此,这些索引/大小数字在两个程序中必须相同。为了验证这一点,在使用它们之前,我们可以将它们打印出来。输出必须匹配。

因此,你的go程序和C程序不是等效的测试。

我们可以花费很多时间来寻找与go等效的[标准] C函数。但是,更简单的方法是编写我们自己的公共/兼容函数。

在这里进行测试,随机函数的质量并不是很重要。一个简单可靠的随机函数可以是线性同余生成器(LCG)。参见:https://en.wikipedia.org/wiki/Linear_congruential_generator

我们可以在代码中创建自己版本的LCG,因为它只是一个一行代码的函数(例如):

seed = (seed * 1103515245) + 12345;

我已经修改了你的程序,以便[可选地]使用简单的LCG而不是libc的rand函数。

你需要相应地修改你的go程序。

这是修改后的程序:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

unsigned int seed = 1;
int opt_r;
int opt_p;

int
myrand(void)
{
    int val;

    // 使用LCG算法
    if (opt_r) {
        seed = (seed * 1103515245) + 12345;
        val = seed;
    }
    else
        val = rand_r(&seed);

    return val;
}

static inline size_t
Array_Splice(int *array, size_t size, size_t index)
{
    if (index >= size)
        return size;
    for (size_t i = index; i < size - 1; i++) {
        array[i] = array[i + 1];
    }
    return size - 1;
}

int
main(int argc,char **argv)
{

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        char *cp = *argv;
        if (*cp != '-')
            break;

        cp += 2;
        switch (cp[-1]) {
        case 'r':
            opt_r = ! opt_r;
            break;
        case 'p':
            opt_p = ! opt_p;
            break;
        }
    }

    size_t size = 1000;
    if (--argc > 0)
        size = strtol(*argv++,NULL,10);
    printf("size=%zu\n",size);

    if (--argc > 0)
        seed = strtol(*argv++,NULL,10);
    printf("seed=%u\n",seed);

    size_t array_size = size;
    int *array = malloc(array_size * sizeof(int));

    if (array == NULL) {
        return 1;
    }

    for (size_t i = 0; i < size; i++) {
        array[i] = myrand() % size;
        if (opt_p)
            printf("array[%zu]=%d\n",i,array[i]);
    }

    const clock_t start = clock();

    while (array_size > 0) {
        size_t array_index = array[array_size / 2] % array_size;
        if (opt_p)
            printf("array_index=%zu\n",array_index);
        array_size = Array_Splice(array, array_size, array_index);
    }
    const clock_t end = clock();
    const double time_taken = (double) (end - start) / (CLOCKS_PER_SEC / 1000000.0f);

    printf("Time taken for size %lu: %fus\n", size, time_taken);

    free(array);
    return 0;
}

使用-p参数打印值(即计时将没有意义)。

使用-r参数使用LCG算法。在正确实现的情况下,这些数字将与此选项匹配。

之后,这两个程序将进行有意义的计时比较。

英文:

I'm not familiar with go and its random number function(s) in particular.

But, I doubt that go's math/rand function rand.Intn will produce the same numbers as libc's rand function.

Ordinarily, the data wouldn't matter but we're using the random values as an index into the array to get the size/length of the array we're modifying.

That is, (e.g.) in the go program, we might use index 50 and the C program might use index 137.

Therefore, these index/size numbers must be the same for both programs. To verify this, we can print them out before using them. The outputs must match.

So, your go program and your C program are not equivalent tests.

We could spend a lot of time trying to find a [standard] C function that is equivalent to go's. But, an easier way is to write our own common/compatible function.

For the testing purposes here, the quality of the random function isn't much of an issue. A simple/reliable random function can be an LCG (linear congruential generator). See: https://en.wikipedia.org/wiki/Linear_congruential_generator

We can create our own version(s) of this in the code as it's just a one-liner (e.g.):

seed = (seed * 1103515245) + 12345;

I've modified your program to [optionally] use a simple LCG instead of libc's rand.

You'll need to modify your go program equivalently.

Here is the modified program:

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;time.h&gt;
unsigned int seed = 1;
int opt_r;
int opt_p;
int
myrand(void)
{
int val;
// use LCG algorithm
if (opt_r) {
seed = (seed * 1103515245) + 12345;
val = seed;
}
else
val = rand_r(&amp;seed);
return val;
}
static inline size_t
Array_Splice(int *array, size_t size, size_t index)
{
if (index &gt;= size)
return size;
for (size_t i = index; i &lt; size - 1; i++) {
array[i] = array[i + 1];
}
return size - 1;
}
int
main(int argc,char **argv)
{
--argc;
++argv;
for (;  argc &gt; 0;  --argc, ++argv) {
char *cp = *argv;
if (*cp != &#39;-&#39;)
break;
cp += 2;
switch (cp[-1]) {
case &#39;r&#39;:
opt_r = ! opt_r;
break;
case &#39;p&#39;:
opt_p = ! opt_p;
break;
}
}
size_t size = 1000;
if (--argc &gt; 0)
size = strtol(*argv++,NULL,10);
printf(&quot;size=%zu\n&quot;,size);
if (--argc &gt; 0)
seed = strtol(*argv++,NULL,10);
printf(&quot;seed=%u\n&quot;,seed);
size_t array_size = size;
int *array = malloc(array_size * sizeof(int));
if (array == NULL) {
return 1;
}
for (size_t i = 0; i &lt; size; i++) {
array[i] = myrand() % size;
if (opt_p)
printf(&quot;array[%zu]=%d\n&quot;,i,array[i]);
}
const clock_t start = clock();
while (array_size &gt; 0) {
size_t array_index = array[array_size / 2] % array_size;
if (opt_p)
printf(&quot;array_index=%zu\n&quot;,array_index);
array_size = Array_Splice(array, array_size, array_index);
}
const clock_t end = clock();
const double time_taken = (double) (end - start) / (CLOCKS_PER_SEC / 1000000.0f);
printf(&quot;Time taken for size %lu: %fus\n&quot;, size, time_taken);
free(array);
return 0;
}

Invoke with -p to print values (i.e. the timing will be meaningless).

Invoke with -r to use the LCG algorithm. Properly implemented [in go], the numbers will match with this option.

After that, the two programs will be making a meaningful timing comparison.

huangapple
  • 本文由 发表于 2023年7月30日 03:20:21
  • 转载请务必保留本文链接:https://go.coder-hub.com/76795129.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定