Segmentation fault在gcc -O中发生。

huangapple go评论87阅读模式
英文:

Segmentation fault occurs in gcc -O

问题

我写了这段代码来返回.txt文件中出现频率最高的前n个单词:

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #define MAX_WORD_LENGTH 100
  5. #define MAX_LINE_LENGTH 1000
  6. struct WordFrequency {
  7. char word[MAX_WORD_LENGTH];
  8. int frequency;
  9. };
  10. char** find_frequent_words(const char* path, int32_t n) {
  11. FILE* file = fopen(path, "r");
  12. if (file == NULL) {
  13. fprintf(stderr, "Failed to open file: %s\n", path);
  14. return NULL;
  15. }
  16. // 创建哈希表来统计单词频率
  17. struct WordFrequency* wordFrequencies = NULL;
  18. int uniqueWordCount = 0;
  19. int maxWordCount = 10000;
  20. char line[MAX_LINE_LENGTH];
  21. while (fgets(line, sizeof(line), file) != NULL) {
  22. char* word = strtok(line, " \t\n");
  23. while (word != NULL) {
  24. int existingIndex = -1;
  25. for (int i = 0; i < uniqueWordCount; i++) {
  26. if (strcmp(wordFrequencies[i].word, word) == 0) {
  27. existingIndex = i;
  28. break;
  29. }
  30. }
  31. if (existingIndex >= 0) {
  32. wordFrequencies[existingIndex].frequency++;
  33. } else {
  34. if (uniqueWordCount == maxWordCount) {
  35. // 增加单词频率数组的大小
  36. maxWordCount *= 2;
  37. struct WordFrequency* newWordFrequencies = realloc(
  38. wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
  39. );
  40. if (newWordFrequencies == NULL) {
  41. fprintf(stderr, "内存分配失败。\n");
  42. fclose(file);
  43. free(wordFrequencies);
  44. return NULL;
  45. }
  46. wordFrequencies = newWordFrequencies;
  47. }
  48. strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
  49. wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = '\0';
  50. wordFrequencies[uniqueWordCount].frequency = 1;
  51. uniqueWordCount++;
  52. }
  53. word = strtok(NULL, " \t\n");
  54. }
  55. }
  56. fclose(file);
  57. // 按降序对单词频率进行排序
  58. for (int i = 0; i < uniqueWordCount - 1; i++) {
  59. for (int j = 0; j < uniqueWordCount - i - 1; j++) {
  60. if (wordFrequencies[j].frequency < wordFrequencies[j + 1].frequency) {
  61. struct WordFrequency temp = wordFrequencies[j];
  62. wordFrequencies[j] = wordFrequencies[j + 1];
  63. wordFrequencies[j + 1] = temp;
  64. }
  65. }
  66. }
  67. // 创建包含最常见单词的结果数组
  68. int resultCount = (n < uniqueWordCount) ? n : uniqueWordCount;
  69. char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
  70. if (frequentWords == NULL) {
  71. fprintf(stderr, "内存分配失败。\n");
  72. free(wordFrequencies);
  73. return NULL;
  74. }
  75. for (int i = 0; i < resultCount; i++) {
  76. frequentWords[i] = strdup(wordFrequencies[i].word);
  77. if (frequentWords[i] == NULL) {
  78. fprintf(stderr, "内存分配失败。\n");
  79. for (int j = 0; j < i; j++) {
  80. free(frequentWords[j]);
  81. }
  82. free(frequentWords);
  83. free(wordFrequencies);
  84. return NULL;
  85. }
  86. }
  87. frequentWords[resultCount] = NULL;
  88. free(wordFrequencies);
  89. return frequentWords;
  90. }
  91. int main(int argc, char* argv[]) {
  92. if (argc < 3) {
  93. fprintf(stderr, "用法:%s <file_path> <n>\n", argv[0]);
  94. return 1;
  95. }
  96. const char* path = argv[1];
  97. int32_t n = atoi(argv[2]);
  98. if (n <= 0) {
  99. fprintf(stderr, "n的值无效:%s\n", argv[2]);
  100. return 1;
  101. }
  102. char** frequentWords = find_frequent_words(path, n);
  103. if (frequentWords == NULL) {
  104. return 1;
  105. }
  106. printf("前%d个最常见的单词:\n", n);
  107. for (int i = 0; frequentWords[i] != NULL; i++) {
  108. printf("%s\n", frequentWords[i]);
  109. }
  110. // 释放为frequentWords数组及其元素分配的内存
  111. for (int i = 0; frequentWords[i] != NULL; i++) {
  112. free(frequentWords[i]);
  113. }
  114. free(frequentWords);
  115. return 0;
  116. }

这是我如何编译和运行它的方式:

  1. gcc -o frequent_words frequent_words.c
  2. ./frequent_words tiny_shakespeare.txt 5

但是我得到了以下错误:

  1. Segmentation fault (core dumped)

我不知道哪一行引起了这个错误。我做错了什么?

英文:

I wrote this code to return the top n most frequent words in a .txt file:

  1. #include &lt;stdio.h&gt;
  2. #include &lt;stdlib.h&gt;
  3. #include &lt;string.h&gt;
  4. #define MAX_WORD_LENGTH 100
  5. #define MAX_LINE_LENGTH 1000
  6. struct WordFrequency {
  7. char word[MAX_WORD_LENGTH];
  8. int frequency;
  9. };
  10. char** find_frequent_words(const char* path, int32_t n) {
  11. FILE* file = fopen(path, &quot;r&quot;);
  12. if (file == NULL) {
  13. fprintf(stderr, &quot;Failed to open file: %s\n&quot;, path);
  14. return NULL;
  15. }
  16. // Create a hash table to count word frequencies
  17. struct WordFrequency* wordFrequencies = NULL;
  18. int uniqueWordCount = 0;
  19. int maxWordCount = 10000;
  20. char line[MAX_LINE_LENGTH];
  21. while (fgets(line, sizeof(line), file) != NULL) {
  22. char* word = strtok(line, &quot; \t\n&quot;);
  23. while (word != NULL) {
  24. int existingIndex = -1;
  25. for (int i = 0; i &lt; uniqueWordCount; i++) {
  26. if (strcmp(wordFrequencies[i].word, word) == 0) {
  27. existingIndex = i;
  28. break;
  29. }
  30. }
  31. if (existingIndex &gt;= 0) {
  32. wordFrequencies[existingIndex].frequency++;
  33. } else {
  34. if (uniqueWordCount == maxWordCount) {
  35. // Increase the size of the word frequencies array
  36. maxWordCount *= 2;
  37. struct WordFrequency* newWordFrequencies = realloc(
  38. wordFrequencies, maxWordCount * sizeof(struct WordFrequency)
  39. );
  40. if (newWordFrequencies == NULL) {
  41. fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
  42. fclose(file);
  43. free(wordFrequencies);
  44. return NULL;
  45. }
  46. wordFrequencies = newWordFrequencies;
  47. }
  48. strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);
  49. wordFrequencies[uniqueWordCount].word[sizeof(wordFrequencies[uniqueWordCount].word) - 1] = &#39;\0&#39;;
  50. wordFrequencies[uniqueWordCount].frequency = 1;
  51. uniqueWordCount++;
  52. }
  53. word = strtok(NULL, &quot; \t\n&quot;);
  54. }
  55. }
  56. fclose(file);
  57. // Sort word frequencies in descending order
  58. for (int i = 0; i &lt; uniqueWordCount - 1; i++) {
  59. for (int j = 0; j &lt; uniqueWordCount - i - 1; j++) {
  60. if (wordFrequencies[j].frequency &lt; wordFrequencies[j + 1].frequency) {
  61. struct WordFrequency temp = wordFrequencies[j];
  62. wordFrequencies[j] = wordFrequencies[j + 1];
  63. wordFrequencies[j + 1] = temp;
  64. }
  65. }
  66. }
  67. // Create the result array with the most frequent words
  68. int resultCount = (n &lt; uniqueWordCount) ? n : uniqueWordCount;
  69. char** frequentWords = malloc((resultCount + 1) * sizeof(char*));
  70. if (frequentWords == NULL) {
  71. fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
  72. free(wordFrequencies);
  73. return NULL;
  74. }
  75. for (int i = 0; i &lt; resultCount; i++) {
  76. frequentWords[i] = strdup(wordFrequencies[i].word);
  77. if (frequentWords[i] == NULL) {
  78. fprintf(stderr, &quot;Memory allocation failed.\n&quot;);
  79. for (int j = 0; j &lt; i; j++) {
  80. free(frequentWords[j]);
  81. }
  82. free(frequentWords);
  83. free(wordFrequencies);
  84. return NULL;
  85. }
  86. }
  87. frequentWords[resultCount] = NULL;
  88. free(wordFrequencies);
  89. return frequentWords;
  90. }
  91. int main(int argc, char* argv[]) {
  92. if (argc &lt; 3) {
  93. fprintf(stderr, &quot;Usage: %s &lt;file_path&gt; &lt;n&gt;\n&quot;, argv[0]);
  94. return 1;
  95. }
  96. const char* path = argv[1];
  97. int32_t n = atoi(argv[2]);
  98. if (n &lt;= 0) {
  99. fprintf(stderr, &quot;Invalid value for n: %s\n&quot;, argv[2]);
  100. return 1;
  101. }
  102. char** frequentWords = find_frequent_words(path, n);
  103. if (frequentWords == NULL) {
  104. return 1;
  105. }
  106. printf(&quot;The %d most frequent words:\n&quot;, n);
  107. for (int i = 0; frequentWords[i] != NULL; i++) {
  108. printf(&quot;%s\n&quot;, frequentWords[i]);
  109. }
  110. // Free the memory allocated for the frequentWords array and its elements
  111. for (int i = 0; frequentWords[i] != NULL; i++) {
  112. free(frequentWords[i]);
  113. }
  114. free(frequentWords);
  115. return 0;
  116. }

And this is how I'm compiling and running it:

  1. gcc -o frequent_words frequent_words.c
  2. ./frequent_words tiny_shakespeare.txt 5

But this is the error I'm getting:

  1. Segmentation fault (core dumped)

I don't know what line is causing this. Where am I going wrong?

答案1

得分: 2

It segfaults on:

  1. strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWordCount].word) - 1);

as wordFrequencies == NULL because you only allocate space in the array when uniqueWordCount == maxWordCount. I suggest you initialize maxWordCount to the correct capacity:

  1. #define INITIAL_WORD_COUNT 10000
  2. // ...
  3. int maxWordCount = 0;
  4. // ...
  5. if (uniqueWordCount == maxWordCount) {
  6. // Increase the size of the word frequencies array
  7. maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;

Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.

英文:

It segfaults on:

  1. strncpy(wordFrequencies[uniqueWordCount].word, word, sizeof(wordFrequencies[uniqueWor
  2. dCount].word) - 1);

as wordFrequencies == NULL because you only allocate space in the array when uniqueWordCount == maxWordCount. I suggest you initialize maxWordCount to the correct capacity:

  1. #define INITIAL_WORD_COUNT 10000
  2. // ...
  3. int maxWordCount = 0;
  4. // ...
  5. if (uniqueWordCount == maxWordCount) {
  6. // Increase the size of the word frequencies array
  7. maxWordCount = maxWordCount ? 2 * maxWordCount : INITIAL_WORD_COUNT;

Consider eliminating the batch logic and just grow it one entry at a time. If it's a proven performance issue then reintroduce the batch logic.

huangapple
  • 本文由 发表于 2023年5月25日 12:07:35
  • 转载请务必保留本文链接:https://go.coder-hub.com/76328865.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定