将文件的最后 ‘n’ 行以相反的顺序复制到另一个文件。

huangapple go评论72阅读模式
英文:

Copy the last 'n' lines of a file to another file in reverse order

问题

我正在处理Ubuntu中的系统调用,并使用C语言实现它。我编写了这段代码来执行上述操作,但在目标文件中,比如说“b.txt”,我只得到了'n'个空行,而不是这些行中所需的文本。我无法确定这里是否存在逻辑错误,还是我需要更改与fd2对应的open()的权限条件/值(模式)?

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int main()
{
	int n, fd1, fd2, choice=0;
	
	char sPathName[100], dPathName[100], c;
	
	printf("Enter name of the source file: \n");
	scanf("%s", sPathName);
	
	while(access(sPathName, F_OK) != 0)	//给定文件在此目录中不存在
	{
		printf("The file \"%s\" does not exist in the given directory!", sPathName);
		printf("\n\nEnter name of a valid source file: \n");
		scanf("%s", sPathName);
	}
	
	fd1 = open(sPathName, O_RDONLY);
	
	printf("\nEnter the integer value of number of lines, 'n': \n");
	scanf("%d", &n);
	
	printf("\nEnter name of the destination file: \n");
	scanf("%s", dPathName);
	
	if(access(dPathName, F_OK) != 0)	//给定文件在此目录中不存在
	{
		fd2 = open(dPathName, O_CREAT|O_WRONLY, 0666);
	}
	
	else
	{
		printf("\nA file by the name \"%s\" already exists in this directory! \nDo you want to: \n1) Overwrite it \n2) Append to it? \nChoice: ", dPathName);
		scanf("%d", &choice);
		
		if(choice == 1)
		{
			fd2 = open(dPathName, O_WRONLY);
		}
		
		else if(choice == 2)
		{
			fd2 = open(dPathName, O_WRONLY|O_APPEND);
		}
		
		else
		{
			printf("\nInvalid choice! Exiting program...\n");
			return 0;
		}
	}
	
	while(n != 0)
	{
		while(1)
		{
			lseek(fd1, -1, SEEK_END);
			
			read(fd1, &c, 1);
			write(fd2, &c, 1);
			
			if(c == '\n')
			{
				break;
			}
		}
				
		n--;
	}
}
英文:

I am working on system calls in Ubuntu and implementing it using C language. I wrote this code to do the aforesaid but in the destination file say, "b.txt", I am only getting 'n' number of blank lines instead of the desired texts in those lines.
I can't figure out if there is some logical error here or do I need to change the permission conditions/values (mode) for open() corresponding to fd2?

#include &lt;stdio.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/types.h&gt;
#include &lt;sys/stat.h&gt;
#include &lt;fcntl.h&gt;
int main()
{
int n, fd1, fd2, choice=0;
char sPathName[100], dPathName[100], c;
printf(&quot;Enter name of the source file: \n&quot;);
scanf(&quot;%s&quot;, sPathName);
while(access(sPathName, F_OK) != 0)	//Given file doesn&#39;t exist (in the this directory)
{
printf(&quot;The file \&quot;%s\&quot; does not exist in the given directory!&quot;, sPathName);
printf(&quot;\n\nEnter name of a valid source file: \n&quot;);
scanf(&quot;%s&quot;, sPathName);
}
fd1 = open(sPathName, O_RDONLY);
printf(&quot;\nEnter the integer value of number of lines, &#39;n&#39;: \n&quot;);
scanf(&quot;%d&quot;, &amp;n);
printf(&quot;\nEnter name of the destination file: \n&quot;);
scanf(&quot;%s&quot;, dPathName);
if(access(dPathName, F_OK) != 0)	//Given file doesn&#39;t exist (in this directory)
{
fd2 = open(dPathName, O_CREAT|O_WRONLY, 0666);
}
else
{
printf(&quot;\nA file by the name \&quot;%s\&quot; already exists in this directory! \nDo you want to: \n1) Overwrite it \n2) Append to it? \nChoice: &quot;, dPathName);
scanf(&quot;%d&quot;, &amp;choice);
if(choice == 1)
{
fd2 = open(dPathName, O_WRONLY);
}
else if(choice == 2)
{
fd2 = open(dPathName, O_WRONLY|O_APPEND);
}
else
{
printf(&quot;\nInvalid choice! Exiting program...\n&quot;);
return 0;
}
}
while(n != 0)
{
while(1)
{
lseek(fd1, -1, SEEK_END);
read(fd1, &amp;c, 1);
write(fd2, &amp;c, 1);
if(c == &#39;\n&#39;)
{
break;
}
}
n--;
}
}

答案1

得分: 1

以下是代码的翻译部分:

问题是以下循环:

```c
while(n != 0)
{
    while(1)
    {
        lseek(fd1, -1, SEEK_END);
            
        read(fd1, &amp;c, 1);
        write(fd2, &amp;c, 1);
            
        if(c == &#39;\n&#39;)
        {
            break;
        }
    }
                
    n--;
}

在读取每个字符之前,您将寻找到文件的最后一个字符。这意味着您总是只读取和写入文件的最后一个字符。如果这个字符是一个换行符,那么您将写入大量的空白行。

为了使您的方法有效,您应该首先读取和写入最后一个字符,然后是倒数第二个字符,然后是倒数第三个字符等,直到找到所需数量的换行符。

为了找到文件中最后一行的不应写入的换行符,您应该首先使用以下方式寻找到文件的最后一个字符:

lseek(fd1, -1, SEEK_END);

这是您已经在做的,但在读取该字符后,您不应再次使用该函数调用,而应该使用以下方式:

lseek(fd1, -2, SEEK_CUR);

跳回一个字符,并跳过您刚刚读取的字符(总共2个字符)。之后,您应该始终使用以下方式:

lseek(fd1, -2, SEEK_CUR);

而不是

lseek(fd1, -1, SEEK_END);

在读取每个字符后,直到找到文件中最后一行的不应写入的换行符。

此外,通常建议检查系统调用的返回值。否则,如果例如整个文本文件不超过n行,您的程序可能会表现不正常。

因此,我建议您将代码更改为以下内容:

//尝试定位到文件的最后一个字符
if (lseek(fd1, -1, SEEK_END) == -1)
{
    perror("lseek");
    exit(EXIT_FAILURE);
}

while (1)
{
    ssize_t ret;

    //尝试读取一个字符
    ret = read(fd1, &amp;c, 1);
    if (ret != 1)
    {
        if (ret == -1)
        {
            perror("read");
        }
        else
        {
            fprintf(stderr, "从“read”中遇到意外的返回值!\n");
        }
        exit(EXIT_FAILURE);
    }

    //尝试写入一个字符
    ret = write(fd2, &amp;c, 1);
    if (ret != 1)
    {
        if (ret == -1)
        {
            perror("write");
        }
        else
        {
            fprintf(stderr, "从“write”中遇到意外的返回值!\n");
        }
        exit(EXIT_FAILURE);
    }

    //计算换行符的数量,并在达到限制时退出无限循环
    if (c == '\n')
    {
        if (n-- == 0)
        {
            break;
        }
    }

    //在文件中后退两个字符
    if (lseek(fd1, -2, SEEK_CUR) == -1)
    {
        perror("lseek");
        exit(EXIT_FAILURE);
    }
}

请注意,此代码还需要在文件顶部包含#include <stdlib.h>

对于输入:

This is line1.
This is line2.
This is line3.
This is line4.
This is line5.
This is line6.
This is line7.
This is line8.

并且n等于5,此代码的输出如下:


.8enil si sihT
.7enil si sihT
.6enil si sihT
.5enil si sihT
.4enil si sihT

请注意,所有字符都以相反的顺序写入,包括换行符。因此,第一行为空,因为第一个写入的字符是换行符。

值得注意的是,对每个字符执行三次系统调用非常低效。更高效的方法是一次读取几千字节。

英文:

The problem is the following loop:

while(n != 0)
{
while(1)
{
lseek(fd1, -1, SEEK_END);
read(fd1, &amp;c, 1);
write(fd2, &amp;c, 1);
if(c == &#39;\n&#39;)
{
break;
}
}
n--;
}

Before reading every character, you seek to the last character of the file. This means that you are always only reading and writing the last character of the file. If this character is a newline character, then you will write lots of blank lines.

In order for your approach to work, you should first read and write the last character, then the second-to-last character, then the third-to-last character, etc., until you have found the required number of newline characters.

In order to find the newline character of the last line in the file that should not be written, you should first seek to the last character of the file using

lseek(fd1, -1, SEEK_END);

which you are already doing, but after reading that character, you should not use that function call again, but should instead use

lseek(fd1, -2, SEEK_CUR);

to jump back one character and to also jump over the character that you just read (so 2 characters in total). Afterwards, you should always use

lseek(fd1, -2, SEEK_CUR);

instead of

lseek(fd1, -1, SEEK_END);

after reading every character, until you find the newline character of the last line in the file that should not be written.

Also, it is generally advisable to check the return values of the system calls. Otherwise, your program will likely misbehave if, for example, the entire text file consists of less than n lines.

Therefore, I recommend that you change your code to the following:

//attempt to seek to the last character of the file
if ( lseek(fd1, -1, SEEK_END) == -1 )
{
perror( &quot;lseek&quot; );
exit( EXIT_FAILURE );
}
while ( 1 )
{
ssize_t ret;
//attempt to read a character
ret = read(fd1, &amp;c, 1);
if ( ret != 1 )
{
if ( ret == -1 )
{
perror( &quot;read&quot; );
}
else
{
fprintf( stderr, &quot;Encountered unexpected return value from \&quot;read\&quot;!\n&quot; );
}
exit( EXIT_FAILURE );
}
//attempt to write a character
ret = write( fd2, &amp;c, 1 );
if ( ret != 1 )
{
if ( ret == -1 )
{
perror( &quot;write&quot; );
}
else
{
fprintf( stderr, &quot;Encountered unexpected return value from \&quot;write\&quot;!\n&quot; );
}
exit( EXIT_FAILURE );
}
//count the number of newline characters and break out
//of infinite loop if we have reached the limit
if(c == &#39;\n&#39;)
{
if ( n-- == 0 )
{
break;
}
}
//jump back two characters in the file
if ( lseek(fd1, -2, SEEK_CUR) == -1 )
{
perror( &quot;lseek&quot; );
exit( EXIT_FAILURE );
}
}

Note that this code additionally requires #include &lt;stdlib.h&gt; at the top of the file.

For the input

This is line1.
This is line2.
This is line3.
This is line4.
This is line5.
This is line6.
This is line7.
This is line8.

and with n equal 5, this code has the following output:


.8enil si sihT
.7enil si sihT
.6enil si sihT
.5enil si sihT
.4enil si sihT

Note that all characters are written in reverse order, including the newline characters. As a consequence, the first line is empty, because the first character written is a newline character.

It is also worth noting that performing three system calls for every character is highly inefficient. A more efficient approach would read several kilobytes at once.

huangapple
  • 本文由 发表于 2023年4月13日 21:04:58
  • 转载请务必保留本文链接:https://go.coder-hub.com/76005786.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定