segmentation fault 在使用 fclose() 时发生。

huangapple go评论56阅读模式
英文:

segmentation fault occurs when using fclose()

问题

我正在创建一个函数,当运行时,它会读取一个文件,并将另一个文件移动到文件中描述的位置 "PREVIOUSSTOP"(文件只包含一个数字),然后使用该数字将名为 "info" 的文件移动到 "PREVIOUSSTOP" 中描述的位置(如果 "PREVIOUSSTOP" 中的数字是 x,则将 info 文件向上移动 x 个句子)。当我在某个函数中使用 fclose() 时,出现了分段错误。有人能帮忙修复这个问题吗?我不知道为什么会发生这种情况,因为文件不等于 NULL。我尝试使用 gdb 调试,但结果是一个包含 600 多个问号的回溯。

#include <stdio.h>
#include <stdlib.h>
int stopnum = 0;

void GetInfoFromFile(FILE* info, char* Sentence1) {
    int i = 0;
    while (1) {
        char c;
        c = fgetc(info);
        Sentence1[i] = c;
        if (Sentence1[i] == '.' || Sentence1[i] == '!' || Sentence1[i] == '?') {
            break;
        }
        i++;
    }
    Sentence1[i + 1] = '
#include <stdio.h>
#include <stdlib.h>
int stopnum = 0;

void GetInfoFromFile(FILE* info, char* Sentence1) {
    int i = 0;
    while (1) {
        char c;
        c = fgetc(info);
        Sentence1[i] = c;
        if (Sentence1[i] == '.' || Sentence1[i] == '!' || Sentence1[i] == '?') {
            break;
        }
        i++;
    }
    Sentence1[i + 1] = '\0';
}

void RestartPreviousTrainingSession(FILE* info) {
    FILE* stop = fopen("PREVIOUSSTOP", "r");
    if (stop != NULL) {
        char* stopstr = malloc(32);
        fgets(stopstr, 32, stop);
        int stopint = strtol(stopstr, NULL, 10);
        stopnum = stopint;
        char dummysentence[400];
        for (int i = 0; i != stopint; i++) {
            GetInfoFromFile(info, dummysentence);
        }
        /* 分段错误发生在这里 */
        fclose(stop);
    } else {
        printf("OOPS\n");
    }
}

int main() {
    FILE* f = fopen("wiki.train.tokens", "r");
    RestartPreviousTrainingSession(f);
    fclose(f);
}
'
;
} void RestartPreviousTrainingSession(FILE* info) { FILE* stop = fopen("PREVIOUSSTOP", "r"); if (stop != NULL) { char* stopstr = malloc(32); fgets(stopstr, 32, stop); int stopint = strtol(stopstr, NULL, 10); stopnum = stopint; char dummysentence[400]; for (int i = 0; i != stopint; i++) { GetInfoFromFile(info, dummysentence); } /* 分段错误发生在这里 */ fclose(stop); } else { printf("OOPS\n"); } } int main() { FILE* f = fopen("wiki.train.tokens", "r"); RestartPreviousTrainingSession(f); fclose(f); }

名为 "wiki.train.tokens" 的文件可以在以下链接找到:https://huggingface.co/datasets/wikitext/blob/main/wikitext.py

英文:

I am creating a function that when run reads a file and brings another file to the position described in the file "PREVIOUSSTOP" (the file just contains a number) , the number is then used to move the file known as "info" up to the position described in "PREVIOUSSTOP" (if the number in "PREVIOUSSTOP" is x, the info file is moved up x many sentences). When I use fclose() in a certain function I get a Segmentation Fault.
Can someone help fix this, I have no idea on why this occurs as the file is not equal to NULL.
I have tried to use gdb to debug this but that just results in a backtrace with over 600 question marks

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
int stopnum = 0;

void GetInfoFromFile(FILE* info, char* Sentence1) {
    int i = 0;
    while (1) {
        char c;
        c = fgetc(info);
        Sentence1[i] = c;
        if (Sentence1[i] == &#39;.&#39; || Sentence1[i] == &#39;!&#39; || Sentence1[i] == &#39;?&#39;) {
            break;
        }
        i++;
    }
    Sentence1[i + 1] = &#39;
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
int stopnum = 0;
void GetInfoFromFile(FILE* info, char* Sentence1) {
int i = 0;
while (1) {
char c;
c = fgetc(info);
Sentence1[i] = c;
if (Sentence1[i] == &#39;.&#39; || Sentence1[i] == &#39;!&#39; || Sentence1[i] == &#39;?&#39;) {
break;
}
i++;
}
Sentence1[i + 1] = &#39;\0&#39;;
}
void RestartPreviousTrainingSession(FILE* info) {
FILE* stop = fopen(&quot;PREVIOUSSTOP&quot;, &quot;r&quot;);
if (stop != NULL) {
char* stopstr = malloc(32);
fgets(stopstr, 32, stop);
int stopint = strtol(stopstr, NULL, 10);
stopnum = stopint;
char dummysentence[400];
for (int i = 0; i != stopint; i++) {
GetInfoFromFile(info, dummysentence);
}
/* segmentation fault occurs here */
fclose(stop);
} else {
printf(&quot;OOPS\n&quot;);
}
}
int main() {
FILE* f = fopen(&quot;wiki.train.tokens&quot;, &quot;r&quot;);
RestartPreviousTrainingSession(f);
fclose(f);
}
&#39;; } void RestartPreviousTrainingSession(FILE* info) { FILE* stop = fopen(&quot;PREVIOUSSTOP&quot;, &quot;r&quot;); if (stop != NULL) { char* stopstr = malloc(32); fgets(stopstr, 32, stop); int stopint = strtol(stopstr, NULL, 10); stopnum = stopint; char dummysentence[400]; for (int i = 0; i != stopint; i++) { GetInfoFromFile(info, dummysentence); } /* segmentation fault occurs here */ fclose(stop); } else { printf(&quot;OOPS\n&quot;); } } int main() { FILE* f = fopen(&quot;wiki.train.tokens&quot;, &quot;r&quot;); RestartPreviousTrainingSession(f); fclose(f); }

the file known as "wiki.train.tokens" can be found at https://huggingface.co/datasets/wikitext/blob/main/wikitext.py

答案1

得分: 2

  1. 我同意 @MarkAdler 的观点,你很可能在 GetInfoFromFile() 函数中向 dummysentence 写入超过 400 个字节。修复的方法是添加一个长度参数。

  2. (未修复) 在当前的实现中,RestartPreviousTrainingSession() 需要知道是否读取了部分标记,以便不增加 i。也就是说,我建议你编写一个函数来跳过 stopint 个标记,将其视为内部事务。然后,你可以通过读取一块数据(比如,4k),查找第 stopint 个标记,并使用 ftell() / fseek() 来设置文件位置指示器。

  3. (未修复) 尽量避免使用全局变量。共享状态难以测试。

  4. fgetc() 返回一个 int;需要检查是否返回了 EOF。重新设计逻辑,使得终止的 \0 在循环终止的所有 3 种方式中都处于正确的位置。

  5. malloc() 可能返回 NULL。

  6. strtol() 可能无法解析 PREVIOUSSTOP 中的数字。它还可能返回一个超出你期望的 int 范围的值。

  7. 使用符号常量而不是魔法值。

  8. 更喜欢将 * 放在变量旁边,而不是类型。FILE* a, b 在你期望它表示 FILE *a, *b 时会出错。

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TOKEN_LEN 32
int stopnum = 0;
#define MAX_SENTENCE_LEN 400

void GetInfoFromFile(FILE *info, size_t n, char Sentence1[n]) {
    int i = 0;
    for (; (!i || !strchr(".!?", Sentence1[i-1])) && i < n - 1; i++) {
        int c = fgetc(info);
        if (c == EOF)
            break;
        Sentence1[i] = c;
    }
    Sentence1[i] = '
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TOKEN_LEN 32
int stopnum = 0;
#define MAX_SENTENCE_LEN 400

void GetInfoFromFile(FILE *info, size_t n, char Sentence1[n]) {
    int i = 0;
    for (; (!i || !strchr(".!?", Sentence1[i-1])) && i < n - 1; i++) {
        int c = fgetc(info);
        if (c == EOF)
            break;
        Sentence1[i] = c;
    }
    Sentence1[i] = '\0';
}

void RestartPreviousTrainingSession(FILE* info) {
    FILE *stop = fopen("PREVIOUSSTOP", "r");
    if (!stop) {
        printf("OOPS\n");
        return;
    }
    char *stopstr = malloc(TOKEN_LEN);
    if (!stopstr) {
        printf("malloc failed\n");
        goto out;
    }
    fgets(stopstr, TOKEN_LEN, stop);
    char *endptr;
    long stopint = strtol(stopstr, &endptr, 10);
    if (stopint < 0 || stopint > INT_MAX || stopstr == endptr) {
        printf("PREVIOUSSTOP data was invalid at %s", stopstr);
        goto out;
    }
    stopnum = stopint;

    char dummysentence[MAX_SENTENCE_LEN];
    for (int i = 0; i != stopint; i++) {
        GetInfoFromFile(info, MAX_SENTENCE_LEN, dummysentence);
    }
out:
    fclose(stop);
}

int main(void) {
    FILE *f = fopen("wiki.train.tokens", "r");
    RestartPreviousTrainingSession(f);
    fclose(f);
}
'
;
} void RestartPreviousTrainingSession(FILE* info) { FILE *stop = fopen("PREVIOUSSTOP", "r"); if (!stop) { printf("OOPS\n"); return; } char *stopstr = malloc(TOKEN_LEN); if (!stopstr) { printf("malloc failed\n"); goto out; } fgets(stopstr, TOKEN_LEN, stop); char *endptr; long stopint = strtol(stopstr, &endptr, 10); if (stopint < 0 || stopint > INT_MAX || stopstr == endptr) { printf("PREVIOUSSTOP data was invalid at %s", stopstr); goto out; } stopnum = stopint; char dummysentence[MAX_SENTENCE_LEN]; for (int i = 0; i != stopint; i++) { GetInfoFromFile(info, MAX_SENTENCE_LEN, dummysentence); } out: fclose(stop); } int main(void) { FILE *f = fopen("wiki.train.tokens", "r"); RestartPreviousTrainingSession(f); fclose(f); }
英文:

You did not supply the input files your program relies on, and I didn't feel like search for an example.

  1. I concur with @MarkAdler that you are most likely writing more than 400 bytes to dummysentence in GetInfoFromFile(). The way to fix that is add a length parameter.

  2. (Not fixed) In the current implementation RestartPreviousTrainingSession() would need to know if partial token was read as not to increment i. That said, I suggest you write a function to skip stopint tokens instead and make it an internal affair. You can then optimize it by reading a block of data (say, 4k), look for the stopint'th token, and use ftell() / fseek() to set the file position indicator.

  3. (Not fixed) Avoid global variables whenever possible. Shared state is hard to test.

  4. fgetc() returns an int; needed to check if EOF was returned. Reworked logic so the terminating `\0' ends up at the right place for all 3 ways the loop terminates.

  5. malloc() may return NULL.

  6. strtol() may fail to parse the number in PREVIOUSSTOP. It may also return a value that is outside the range of int you expect.

  7. Use symbolic constants instead of magic values.

  8. Prefer * next to variable instead of type. FILE* a, b is an error when you expect it to mean FILE *a, *b.

#include &lt;limits.h&gt;
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#define TOKEN_LEN 32
int stopnum = 0;
#define MAX_SENTENCE_LEN 400

void GetInfoFromFile(FILE *info, size_t n, char Sentence1[n]) {
	int i = 0;
	for(; (!i || !strchr(&quot;.!?&quot;, Sentence1[i-1])) &amp;&amp; i &lt; n - 1; i++) {
		int c = fgetc(info);
		if(c == EOF)
			break;
		Sentence1[i] = c;
	}
	Sentence1[i] = &#39;
#include &lt;limits.h&gt;
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#define TOKEN_LEN 32
int stopnum = 0;
#define MAX_SENTENCE_LEN 400
void GetInfoFromFile(FILE *info, size_t n, char Sentence1[n]) {
int i = 0;
for(; (!i || !strchr(&quot;.!?&quot;, Sentence1[i-1])) &amp;&amp; i &lt; n - 1; i++) {
int c = fgetc(info);
if(c == EOF)
break;
Sentence1[i] = c;
}
Sentence1[i] = &#39;\0&#39;;
}
void RestartPreviousTrainingSession(FILE* info) {
FILE *stop = fopen(&quot;PREVIOUSSTOP&quot;, &quot;r&quot;);
if (!stop) {
printf(&quot;OOPS\n&quot;);
return;
}
char *stopstr = malloc(TOKEN_LEN);
if(!stopstr) {
printf(&quot;malloc failed\n&quot;);
goto out;
}
fgets(stopstr, TOKEN_LEN, stop);
char *endptr;
long stopint = strtol(stopstr, &amp;endptr, 10);
if(stopint &lt; 0 || stopint &gt; INT_MAX || stopstr == endptr) {
printf(&quot;PREVIOUSSTOP data was invalid at %s&quot;, stopstr);
goto out;
}
stopnum = stopint;
char dummysentence[MAX_SENTENCE_LEN];
for (int i = 0; i != stopint; i++) {
GetInfoFromFile(info, MAX_SENTENCE_LEN, dummysentence);
}
out:
fclose(stop);
}
int main(void) {
FILE *f = fopen(&quot;wiki.train.tokens&quot;, &quot;r&quot;);
RestartPreviousTrainingSession(f);
fclose(f);
}
&#39;; } void RestartPreviousTrainingSession(FILE* info) { FILE *stop = fopen(&quot;PREVIOUSSTOP&quot;, &quot;r&quot;); if (!stop) { printf(&quot;OOPS\n&quot;); return; } char *stopstr = malloc(TOKEN_LEN); if(!stopstr) { printf(&quot;malloc failed\n&quot;); goto out; } fgets(stopstr, TOKEN_LEN, stop); char *endptr; long stopint = strtol(stopstr, &amp;endptr, 10); if(stopint &lt; 0 || stopint &gt; INT_MAX || stopstr == endptr) { printf(&quot;PREVIOUSSTOP data was invalid at %s&quot;, stopstr); goto out; } stopnum = stopint; char dummysentence[MAX_SENTENCE_LEN]; for (int i = 0; i != stopint; i++) { GetInfoFromFile(info, MAX_SENTENCE_LEN, dummysentence); } out: fclose(stop); } int main(void) { FILE *f = fopen(&quot;wiki.train.tokens&quot;, &quot;r&quot;); RestartPreviousTrainingSession(f); fclose(f); }

huangapple
  • 本文由 发表于 2023年5月28日 11:11:05
  • 转载请务必保留本文链接:https://go.coder-hub.com/76349786.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定