英文:
Segmentation fault occurs in gcc -O
问题
我写了这段代码来返回.txt文件中出现频率最高的前n个单词:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LENGTH 100
#define MAX_LINE_LENGTH 1000
struct WordFrequency {
char word[MAX_WORD_LENGTH];
int frequency;
};
char** find_frequent_words(const char* path, int32_t n) {
FILE* file = fopen(path, "r");
if (file == NULL) {
fprintf(stderr, "Failed to open file: %s\n", path);
return NULL;
}
// 创建哈希表来统计单词频率
struct WordFrequency* wordFrequencies = NULL;
int uniqueWordCount = 0;
int maxWordCount = 10000;
char line[MAX_LINE_LENGTH];
while (fgets(line, sizeof(line), file) != NULL) {
char* word = strtok(line, " \t\n");
while (word != NULL) {
int existingIndex = -1;
for (int i = 0; i < uniqueWordCount; i++) {
if (strcmp(wordFrequencies[i].word, word) == 0) {
existingIndex = i;
break;
}
}
if (existingIndex >= 0) {
wordFrequencies[existingIndex].frequency++;
} else {
if (uniqueWordCount == maxWordCount) {
// 增加单词频率数组的大小
maxWordCount *= 2;
struct WordFrequency* newWordFrequencies = realloc(
wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
);
if (newWordFrequencies == NULL) {
fprintf(stderr, "内存分配失败。\n");
fclose(file);
free(wordFrequencies);
return NULL;
}
wordFrequencies = newWordFrequencies;
}
strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = '\0';
wordFrequencies[uniqueWordCount].frequency = 1;
uniqueWordCount++;
}
word = strtok(NULL, " \t\n");
}
}
fclose(file);
// 按降序对单词频率进行排序
for (int i = 0; i < uniqueWordCount - 1; i++) {
for (int j = 0; j < uniqueWordCount - i - 1; j++) {
if (wordFrequencies[j].frequency < wordFrequencies[j + 1].frequency) {
struct WordFrequency temp = wordFrequencies[j];
wordFrequencies[j] = wordFrequencies[j + 1];
wordFrequencies[j + 1] = temp;
}
}
}
// 创建包含最常见单词的结果数组
int resultCount = (n < uniqueWordCount) ? n : uniqueWordCount;
char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
if (frequentWords == NULL) {
fprintf(stderr, "内存分配失败。\n");
free(wordFrequencies);
return NULL;
}
for (int i = 0; i < resultCount; i++) {
frequentWords[i] = strdup(wordFrequencies[i].word);
if (frequentWords[i] == NULL) {
fprintf(stderr, "内存分配失败。\n");
for (int j = 0; j < i; j++) {
free(frequentWords[j]);
}
free(frequentWords);
free(wordFrequencies);
return NULL;
}
}
frequentWords[resultCount] = NULL;
free(wordFrequencies);
return frequentWords;
}
int main(int argc, char* argv[]) {
if (argc < 3) {
fprintf(stderr, "用法:%s <file_path> <n>\n", argv[0]);
return 1;
}
const char* path = argv[1];
int32_t n = atoi(argv[2]);
if (n <= 0) {
fprintf(stderr, "n的值无效:%s\n", argv[2]);
return 1;
}
char** frequentWords = find_frequent_words(path, n);
if (frequentWords == NULL) {
return 1;
}
printf("前%d个最常见的单词:\n", n);
for (int i = 0; frequentWords[i] != NULL; i++) {
printf("%s\n", frequentWords[i]);
}
// 释放为frequentWords数组及其元素分配的内存
for (int i = 0; frequentWords[i] != NULL; i++) {
free(frequentWords[i]);
}
free(frequentWords);
return 0;
}
这是我如何编译和运行它的方式:
gcc -o frequent_words frequent_words.c
./frequent_words tiny_shakespeare.txt 5
但是我得到了以下错误:
Segmentation fault (core dumped)
我不知道哪一行引起了这个错误。我做错了什么?
英文:
I wrote this code to return the top n most frequent words in a .txt file:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LENGTH 100
#define MAX_LINE_LENGTH 1000
struct WordFrequency {
char word[MAX_WORD_LENGTH];
int frequency;
};
char** find_frequent_words(const char* path, int32_t n) {
FILE* file = fopen(path, "r");
if (file == NULL) {
fprintf(stderr, "Failed to open file: %s\n", path);
return NULL;
}
// Create a hash table to count word frequencies
struct WordFrequency* wordFrequencies = NULL;
int uniqueWordCount = 0;
int maxWordCount = 10000;
char line[MAX_LINE_LENGTH];
while (fgets(line, sizeof(line), file) != NULL) {
char* word = strtok(line, " \t\n");
while (word != NULL) {
int existingIndex = -1;
for (int i = 0; i < uniqueWordCount; i++) {
if (strcmp(wordFrequencies[i].word, word) == 0) {
existingIndex = i;
break;
}
}
if (existingIndex >= 0) {
wordFrequencies[existingIndex].frequency++;
} else {
if (uniqueWordCount == maxWordCount) {
// Increase the size of the word frequencies array
maxWordCount *= 2;
struct WordFrequency* newWordFrequencies = realloc(
wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
);
if (newWordFrequencies == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
fclose(file);
free(wordFrequencies);
return NULL;
}
wordFrequencies = newWordFrequencies;
}
strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = '\0';
wordFrequencies[uniqueWordCount].frequency = 1;
uniqueWordCount++;
}
word = strtok(NULL, " \t\n");
}
}
fclose(file);
// Sort word frequencies in descending order
for (int i = 0; i < uniqueWordCount - 1; i++) {
for (int j = 0; j < uniqueWordCount - i - 1; j++) {
if (wordFrequencies[j].frequency < wordFrequencies[j + 1].frequency) {
struct WordFrequency temp = wordFrequencies[j];
wordFrequencies[j] = wordFrequencies[j + 1];
wordFrequencies[j + 1] = temp;
}
}
}
// Create the result array with the most frequent words
int resultCount = (n < uniqueWordCount) ? n : uniqueWordCount;
char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
if (frequentWords == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
free(wordFrequencies);
return NULL;
}
for (int i = 0; i < resultCount; i++) {
frequentWords[i] = strdup(wordFrequencies[i].word);
if (frequentWords[i] == NULL) {
fprintf(stderr, "Memory allocation failed.\n");
for (int j = 0; j < i; j++) {
free(frequentWords[j]);
}
free(frequentWords);
free(wordFrequencies);
return NULL;
}
}
frequentWords[resultCount] = NULL;
free(wordFrequencies);
return frequentWords;
}
int main(int argc, char* argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: %s <file_path> <n>\n", argv[0]);
return 1;
}
const char* path = argv[1];
int32_t n = atoi(argv[2]);
if (n <= 0) {
fprintf(stderr, "Invalid value for n: %s\n", argv[2]);
return 1;
}
char** frequentWords = find_frequent_words(path, n);
if (frequentWords == NULL) {
return 1;
}
printf("The %d most frequent words:\n", n);
for (int i = 0; frequentWords[i] != NULL; i++) {
printf("%s\n", frequentWords[i]);
}
// Free the memory allocated for the frequentWords array and its elements
for (int i = 0; frequentWords[i] != NULL; i++) {
free(frequentWords[i]);
}
free(frequentWords);
return 0;
}
And this is how I'm compiling and running it:
gcc -o frequent_words frequent_words.c
./frequent_words tiny_shakespeare.txt 5
But this is the error I'm getting:
Segmentation fault (core dumped)
I don't know what line is causing this. Where am I going wrong?
答案1
得分: 2
It segfaults on:
strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
as wordFrequencies == NULL
because you only allocate space in the array when uniqueWordCount == maxWordCount
. I suggest you initialize maxWordCount
to the correct capacity:
#define INITIAL_WORD_COUNT 10000
// ...
int maxWordCount = 0;
// ...
if (uniqueWordCount == maxWordCount) {
// Increase the size of the word frequencies array
maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;
Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.
英文:
It segfaults on:
strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWor
dCount].word) - 1);
as wordFrequencies == NULL
because you only allocate space in the array when uniqueWordCount == maxWordCount
. I suggest you initialize maxWordCount
to the correct capacity:
#define INITIAL_WORD_COUNT 10000
// ...
int maxWordCount = 0;
// ...
if (uniqueWordCount == maxWordCount) {
// Increase the size of the word frequencies array
maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;
Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论