为什么这个全局变量在不同线程之间的值不同?

huangapple go评论75阅读模式
英文:

Why is the value of this global variable different between threads?

问题

在这个程序中,我声明了一些全局变量,并创建了多个与它们交互的线程。

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <fcntl.h>
#include <assert.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/sysinfo.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>

typedef struct zip_seg zip_seg_t;
typedef struct stat stat_t;

pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;

char **file_ptr;
char *segment_ptr;
stat_t *stat_buf;

struct zip_seg {
    char *data;
    size_t size;
};

int index_cur_file = 0, index_cur_segment = 0;
int n_segments;
int total_segments_cur_file = 0;
int segment_size = 100;
zip_seg_t *zip_list;

void* segment_handler() {    
    int size;
    pid_t x = syscall(__NR_gettid);

    while(1) {
        pthread_mutex_lock(&lock);
        if(index_cur_segment >= n_segments) {
            pthread_mutex_unlock(&lock);
            break;
        }
        if((total_segments_cur_file * segment_size) < stat_buf[index_cur_file].st_size) {
            size = stat_buf[index_cur_file].st_size - (total_segments_cur_file * segment_size);
            index_cur_file++;
            total_segments_cur_file = 0;
            segment_ptr = file_ptr[index_cur_file];
        }
        else {
            size = segment_size;
            total_segments_cur_file++;
            segment_ptr += size;
        }
        int i = index_cur_segment++;
        pthread_mutex_unlock(&lock);
        zip_list[i] = wzip(segment_ptr, size);
    }
}

int main(int argc, char *argv[])
{
    char c;
    int dummy;
    while((c = getopt(argc, argv, "s:")) != -1) {
        switch(c) {
        case 's':
            dummy = atoi(optarg);
            break;
        }
    }
    
    int n_files = argc - optind;
    int n_threads = get_nprocs();
    int fd[n_files];
    file_ptr = malloc(n_files * sizeof(*file_ptr));
    stat_buf = malloc(n_files * sizeof(*stat_buf));
    pthread_t *workers = malloc(n_threads * sizeof(*workers));
    if(!file_ptr || !stat_buf || !workers) {
        printf("内存分配错误。\n");
        return -1;
    }
    for (int i = 0; i < n_files; i++)
    {
        char *file_name = argv[optind];
        fd[i] = open(file_name, O_RDONLY);
        fstat(fd[i], &stat_buf[i]);
        file_ptr[i] = mmap(NULL, stat_buf[i].st_size, PROT_READ, MAP_PRIVATE, fd[i], 0);
        int extra_segment = (segment_size % stat_buf[i].st_size) > 0 ? 1 : 0;
        n_segments += (stat_buf[i].st_size / segment_size) + extra_segment;
    }
    zip_list = malloc(n_segments * sizeof(*zip_list));
    if(!zip_list) {
        printf("内存分配错误。\n");
        return -1;
    }

    for (int i = 0; i < n_threads; ++i)
    {
        pthread_create(&workers[i], NULL, segment_handler, NULL);        
    }
    for (int i = 0; i < n_threads; ++i)
    {        
        pthread_join(workers[i], NULL);
    }

    join();

    for (int i = 0; i < n_files; i++)
    {
        munmap(file_ptr[i], stat_buf[i].st_size);
    }
    free(file_ptr);
    free(stat_buf);
    free(workers);
    free(zip_list);

    return 0;
}

导致问题的是n_segments,在启动子线程之前我为其赋值。问题在于在segment_handler中,当我检查它的值时,这些线程中的值为0,这与在主线程中分配的值不同。

为什么这个全局变量在不同线程之间的值不同?
为什么这个全局变量在不同线程之间的值不同?
如果全局变量应该在线程之间共享,为什么在这种情况下n_segments的值会不同呢?

英文:

In this program I declare some global variables and I create several threads that interact with them.

#include &lt;stdio.h&gt;
#include &lt;stdint.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#include &lt;getopt.h&gt;
#include &lt;fcntl.h&gt;
#include &lt;assert.h&gt;
#include &lt;pthread.h&gt;
#include &lt;sys/stat.h&gt;
#include &lt;sys/mman.h&gt;
#include &lt;sys/sysinfo.h&gt;
#include &lt;sys/types.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/syscall.h&gt;
typedef struct zip_seg zip_seg_t;
typedef struct stat stat_t;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
char **file_ptr;
char *segment_ptr;
stat_t *stat_buf;
struct zip_seg {
char *data;
size_t size;
};
int index_cur_file = 0, index_cur_segment = 0;
int n_segments;
int total_segments_cur_file = 0;
int segment_size = 100;
zip_seg_t *zip_list;
void* segment_handler() {	
int size;
pid_t x = syscall(__NR_gettid);
//printf(&quot;ID %d getting in...\n&quot;, x);
while(1) {
pthread_mutex_lock(&amp;lock);
if(index_cur_segment &gt;= n_segments) {
pthread_mutex_unlock(&amp;lock);
break;
}
if((total_segments_cur_file * segment_size) &lt; stat_buf[index_cur_file].st_size) {
size = stat_buf[index_cur_file].st_size - (total_segments_cur_file * segment_size);
index_cur_file++;
total_segments_cur_file = 0;
segment_ptr = file_ptr[index_cur_file];
}
else {
size = segment_size;
total_segments_cur_file++;
segment_ptr += size;
}
int i = index_cur_segment++;
pthread_mutex_unlock(&amp;lock);
zip_list[i] = wzip(segment_ptr, size);
//index_cur_segment++;
}
//printf(&quot;ID %d getting out...\n&quot;, x);
}
int main(int argc, char *argv[])
{
char c;
int dummy;
while((c = getopt(argc, argv, &quot;s:&quot;)) != -1) {
switch(c) {
case &#39;s&#39;:
dummy = atoi(optarg);
break;
}
}
int n_segments = 0;
int n_files = argc - optind;
int n_threads = get_nprocs();
int fd[n_files];
file_ptr = malloc(n_files * sizeof(*file_ptr));
stat_buf = malloc(n_files * sizeof(*stat_buf));
pthread_t *workers = malloc(n_threads * sizeof(*workers));
if(!file_ptr || !stat_buf || !workers) {
printf(&quot;Memory allocation error.\n&quot;);
return -1;
}
for (int i = 0; i &lt; n_files; i++)
{
char *file_name = argv[optind];
fd[i] = open(file_name, O_RDONLY);
fstat(fd[i], &amp;stat_buf[i]);
file_ptr[i] = mmap(NULL, stat_buf[i].st_size, PROT_READ, MAP_PRIVATE, fd[i], 0);
int extra_segment = (segment_size % stat_buf[i].st_size) &gt; 0 ? 1 : 0;
n_segments += (stat_buf[i].st_size / segment_size) + extra_segment;
}
zip_list = malloc(n_segments * sizeof(*zip_list));
if(!zip_list) {
printf(&quot;Memory allocation error.\n&quot;);
return -1;
}
for (int i = 0; i &lt; n_threads; ++i)
{
pthread_create(&amp;workers[i], NULL, segment_handler, NULL);		
}
for (int i = 0; i &lt; n_threads; ++i)
{		
pthread_join(workers[i], NULL);
}
join();
for (int i = 0; i &lt; n_files; i++)
{
munmap(file_ptr[i], stat_buf[i].st_size);
}
free(file_ptr);
free(stat_buf);
free(workers);
free(zip_list);
return 0;
}

The one that is causing problems is n_segments, to which I assign a value before starting the child threads. The problem is that in segment_handler when I check it's value in these threads is 0, which is not what was assigned in the main thread.
为什么这个全局变量在不同线程之间的值不同?
为什么这个全局变量在不同线程之间的值不同?
If global variables are supposed to be shared across threads, why is the value of n_segments different in this case?

答案1

得分: 1

我有两个名为 n_segment 的变量。一个是全局的,另一个是主函数中的局部变量。

这个赋值只影响了主函数中的局部变量。

n_segments += (stat_buf[i].st_size / segment_size) + extra_segment;

全局的 n_segment 一直为0。

英文:

I had two variables named n_segment. One was global, the other was local to main.

This assignment only had effect on the local one in main.

n_segments += (stat_buf[i].st_size / segment_size) + extra_segment;

The global n_segment was always 0.

答案2

得分: -1

如果全局变量应该在多个线程之间共享...

这个说法有两个问题:

(1) 摒弃 "global" 这个词。全局变量在线程之间共享时与其他类型的变量没有任何特殊之处。

(2) 变量不会自动在线程之间共享。如果你希望程序中的一个线程看到由其他线程写入某个变量的值,那么你的程序必须采取显式的操作来同步线程。

最简单的同步它们的方法是使用互斥锁。如果你有一些变量集合,如 vwxy,你希望它们在线程之间共享,那么只要确保在没有锁定 m 的情况下,任何线程都不能访问 vwxy

如果你遵守这个规则,那么每当某个线程 A 锁定互斥锁时,它将看到与线程 B 在线程 B 释放互斥锁时的那一刻变量相同的值,其中线程 B 是最后一个释放互斥锁的线程。

在没有任何同步的情况下共享变量会导致未定义行为(也称为 "UB")。你不希望在你的程序中出现 UB。如果你的程序出现 UB,那么根本没有必要讨论它可能存在的其他问题,或者它行为如何的可能原因。


* 还有其他同步线程之间变量值的方法,但正确使用它们更多是一个高级话题。首先学会使用互斥锁。

英文:

> If global variables are supposed to be shared across threads...

There's two things wrong with that statement:

(1) Get rid of the word "global." Global variables are no more special than any other kind of variable when it comes to sharing between threads.

(2) Variables are not automatically shared between threads. If you want one thread in your program to see a value that was written to some variable by some other thread, then your program must take explicit action to synchronize the threads.

The simplest way* to synchronize them is to use a mutex. If you have some collection of variables; v, w, x, and y that you want to share between the threads, then things will work out pretty much the way you want if you also have some mutex, m, and you never let any of your threads access v, w, x, or y except when the thread has m locked.

If you obey that rule, then whenever some thread A locks the mutex, it will see the same values for the variables that they had in thread B at the moment when thread B released the mutex where thread B is whichever thread was the last one to release it.

Sharing variables without any synchronization leads to Undefined Behavior (a.k.a., "UB"). You don't want UB in your program. Ever. If your program has UB, then there's simply no point in even talking about what else might be wrong with it or, about possible reasons for why it behaves the way it does.


* There are other ways to synchronize variable values across threads, but using them correctly is more of an advanced topic. Learn to do it with mutexes first.

huangapple
  • 本文由 发表于 2023年6月16日 02:25:03
  • 转载请务必保留本文链接:https://go.coder-hub.com/76484524.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定