Segmentation fault在gcc -O中发生。

huangapple go评论48阅读模式
英文:

Segmentation fault occurs in gcc -O

问题

我写了这段代码来返回.txt文件中出现频率最高的前n个单词:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_WORD_LENGTH 100
#define MAX_LINE_LENGTH 1000

struct WordFrequency {
    char word[MAX_WORD_LENGTH];
    int frequency;
};

char** find_frequent_words(const char* path, int32_t n) {
    FILE* file = fopen(path, "r");
    if (file == NULL) {
        fprintf(stderr, "Failed to open file: %s\n", path);
        return NULL;
    }

    // 创建哈希表来统计单词频率
    struct WordFrequency* wordFrequencies = NULL;
    int uniqueWordCount = 0;
    int maxWordCount = 10000;

    char line[MAX_LINE_LENGTH];
    while (fgets(line, sizeof(line), file) != NULL) {
        char* word = strtok(line, " \t\n");
        while (word != NULL) {
            int existingIndex = -1;
            for (int i = 0; i < uniqueWordCount; i++) {
                if (strcmp(wordFrequencies[i].word, word) == 0) {
                    existingIndex = i;
                    break;
                }
            }

            if (existingIndex >= 0) {
                wordFrequencies[existingIndex].frequency++;
            } else {
                if (uniqueWordCount == maxWordCount) {
                    // 增加单词频率数组的大小
                    maxWordCount *= 2;
                    struct WordFrequency* newWordFrequencies = realloc(
                        wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
                    );
                    if (newWordFrequencies == NULL) {
                        fprintf(stderr, "内存分配失败。\n");
                        fclose(file);
                        free(wordFrequencies);
                        return NULL;
                    }
                    wordFrequencies = newWordFrequencies;
                }
                strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
                wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = '\0';
                wordFrequencies[uniqueWordCount].frequency = 1;
                uniqueWordCount++;
            }

            word = strtok(NULL, " \t\n");
        }
    }

    fclose(file);

    // 按降序对单词频率进行排序
    for (int i = 0; i < uniqueWordCount - 1; i++) {
        for (int j = 0; j < uniqueWordCount - i - 1; j++) {
            if (wordFrequencies[j].frequency < wordFrequencies[j + 1].frequency) {
                struct WordFrequency temp = wordFrequencies[j];
                wordFrequencies[j] = wordFrequencies[j + 1];
                wordFrequencies[j + 1] = temp;
            }
        }
    }

    // 创建包含最常见单词的结果数组
    int resultCount = (n < uniqueWordCount) ? n : uniqueWordCount;
    char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
    if (frequentWords == NULL) {
        fprintf(stderr, "内存分配失败。\n");
        free(wordFrequencies);
        return NULL;
    }

    for (int i = 0; i < resultCount; i++) {
        frequentWords[i] = strdup(wordFrequencies[i].word);
        if (frequentWords[i] == NULL) {
            fprintf(stderr, "内存分配失败。\n");
            for (int j = 0; j < i; j++) {
                free(frequentWords[j]);
            }
            free(frequentWords);
            free(wordFrequencies);
            return NULL;
        }
    }
    frequentWords[resultCount] = NULL;

    free(wordFrequencies);

    return frequentWords;
}

int main(int argc, char* argv[]) {
    if (argc < 3) {
        fprintf(stderr, "用法:%s <file_path> <n>\n", argv[0]);
        return 1;
    }

    const char* path = argv[1];
    int32_t n = atoi(argv[2]);
    if (n <= 0) {
        fprintf(stderr, "n的值无效:%s\n", argv[2]);
        return 1;
    }

    char** frequentWords = find_frequent_words(path, n);
    if (frequentWords == NULL) {
        return 1;
    }

    printf("前%d个最常见的单词:\n", n);
    for (int i = 0; frequentWords[i] != NULL; i++) {
        printf("%s\n", frequentWords[i]);
    }

    // 释放为frequentWords数组及其元素分配的内存
    for (int i = 0; frequentWords[i] != NULL; i++) {
        free(frequentWords[i]);
    }
    free(frequentWords);

    return 0;
}

这是我如何编译和运行它的方式:

gcc -o frequent_words frequent_words.c
./frequent_words tiny_shakespeare.txt 5

但是我得到了以下错误:

Segmentation fault (core dumped)

我不知道哪一行引起了这个错误。我做错了什么?

英文:

I wrote this code to return the top n most frequent words in a .txt file:

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#define MAX_WORD_LENGTH 100
#define MAX_LINE_LENGTH 1000
struct WordFrequency {
char word[MAX_WORD_LENGTH];
int frequency;
};
char** find_frequent_words(const char* path, int32_t n) {
FILE* file = fopen(path, &quot;r&quot;);
if (file == NULL) {
fprintf(stderr, &quot;Failed to open file: %s\n&quot;, path);
return NULL;
}
// Create a hash table to count word frequencies
struct WordFrequency* wordFrequencies = NULL;
int uniqueWordCount = 0;
int maxWordCount = 10000;
char line[MAX_LINE_LENGTH];
while (fgets(line, sizeof(line), file) != NULL) {
char* word = strtok(line, &quot; \t\n&quot;);
while (word != NULL) {
int existingIndex = -1;
for (int i = 0; i &lt; uniqueWordCount; i++) {
if (strcmp(wordFrequencies[i].word, word) == 0) {
existingIndex = i;
break;
}
}
if (existingIndex &gt;= 0) {
wordFrequencies[existingIndex].frequency++;
} else {
if (uniqueWordCount == maxWordCount) {
// Increase the size of the word frequencies array
maxWordCount *= 2;
struct WordFrequency* newWordFrequencies = realloc(
wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
);
if (newWordFrequencies == NULL) {
fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
fclose(file);
free(wordFrequencies);
return NULL;
}
wordFrequencies = newWordFrequencies;
}
strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = &#39;\0&#39;;
wordFrequencies[uniqueWordCount].frequency = 1;
uniqueWordCount++;
}
word = strtok(NULL, &quot; \t\n&quot;);
}
}
fclose(file);
// Sort word frequencies in descending order
for (int i = 0; i &lt; uniqueWordCount - 1; i++) {
for (int j = 0; j &lt; uniqueWordCount - i - 1; j++) {
if (wordFrequencies[j].frequency &lt; wordFrequencies[j + 1].frequency) {
struct WordFrequency temp = wordFrequencies[j];
wordFrequencies[j] = wordFrequencies[j + 1];
wordFrequencies[j + 1] = temp;
}
}
}
// Create the result array with the most frequent words
int resultCount = (n &lt; uniqueWordCount) ? n : uniqueWordCount;
char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
if (frequentWords == NULL) {
fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
free(wordFrequencies);
return NULL;
}
for (int i = 0; i &lt; resultCount; i++) {
frequentWords[i] = strdup(wordFrequencies[i].word);
if (frequentWords[i] == NULL) {
fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
for (int j = 0; j &lt; i; j++) {
free(frequentWords[j]);
}
free(frequentWords);
free(wordFrequencies);
return NULL;
}
}
frequentWords[resultCount] = NULL;
free(wordFrequencies);
return frequentWords;
}
int main(int argc, char* argv[]) {
if (argc &lt; 3) {
fprintf(stderr, &quot;Usage: %s &lt;file_path&gt; &lt;n&gt;\n&quot;, argv[0]);
return 1;
}
const char* path = argv[1];
int32_t n = atoi(argv[2]);
if (n &lt;= 0) {
fprintf(stderr, &quot;Invalid value for n: %s\n&quot;, argv[2]);
return 1;
}
char** frequentWords = find_frequent_words(path, n);
if (frequentWords == NULL) {
return 1;
}
printf(&quot;The %d most frequent words:\n&quot;, n);
for (int i = 0; frequentWords[i] != NULL; i++) {
printf(&quot;%s\n&quot;, frequentWords[i]);
}
// Free the memory allocated for the frequentWords array and its elements
for (int i = 0; frequentWords[i] != NULL; i++) {
free(frequentWords[i]);
}
free(frequentWords);
return 0;
}

And this is how I'm compiling and running it:

gcc -o frequent_words frequent_words.c
./frequent_words tiny_shakespeare.txt 5

But this is the error I'm getting:

Segmentation fault (core dumped)

I don't know what line is causing this. Where am I going wrong?

答案1

得分: 2

It segfaults on:

strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);

as wordFrequencies == NULL because you only allocate space in the array when uniqueWordCount == maxWordCount. I suggest you initialize maxWordCount to the correct capacity:

#define INITIAL_WORD_COUNT 10000
// ...
int maxWordCount = 0;
// ...
if (uniqueWordCount == maxWordCount) {
    // Increase the size of the word frequencies array
    maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;

Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.

英文:

It segfaults on:

 strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWor
dCount].word) - 1);

as wordFrequencies == NULL because you only allocate space in the array when uniqueWordCount == maxWordCount. I suggest you initialize maxWordCount to the correct capacity:

#define INITIAL_WORD_COUNT 10000
// ...
int maxWordCount = 0;
// ...
if (uniqueWordCount == maxWordCount) {
// Increase the size of the word frequencies array
maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;

Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.

huangapple
  • 本文由 发表于 2023年5月25日 12:07:35
  • 转载请务必保留本文链接:https://go.coder-hub.com/76328865.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定