英文:
Capturing words within spaces and quotation marks?
问题
The code you provided aims to split a string into words separated by spaces. To achieve the desired output, you need to handle quotation marks correctly. Here's the modified code with the necessary changes:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main () {
char command[BUFSIZ];
char *token;
fgets(command, BUFSIZ, stdin);
// Initialize a variable to track whether we are inside quotation marks
int insideQuotes = 0;
// Split the string by space and quotation marks
token = strtok(command, " ");
while( token != NULL ) {
// Check if the token starts with a quotation mark
if (token[0] == '"') {
if (!insideQuotes) {
// If not inside quotes, remove the starting quotation mark
printf("%s\n", &token[1]);
insideQuotes = 1;
} else {
// If inside quotes, remove the ending quotation mark
printf("%s ", &token[0]);
insideQuotes = 0;
}
} else {
// If not within quotes, print the token
printf("%s\n", token);
}
token = strtok(NULL, " ");
}
return 0;
}
With this code, you should achieve the desired output:
The
Brown
Fox Jumps Over
The Lazy
Dog
英文:
The idea, explicit in the title, is to capture words within spaces and quotation marks here's an example of the input we are dealing with:
Input:
The Brown "Fox Jumps Over" "The Lazy" Dog
Currently my code can capture words within spaces, as many of you know, a basic strtok() is enough. Here's my code so far:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main () {
char command[BUFSIZ];
char *token;
fgets(command,BUFSIZ,stdin);
token = strtok(command, " ");
while( token != NULL ) {
printf( " %s\n", token );
token = strtok(NULL, " ");
}
return 0;
}
And as expected, my code prints the following:
Current Output:
The
Brown
"Fox
Jumps
Over"
"The
Lazy"
Dog
But the whole idea and problem is to get the following output:
The
Brown
Fox Jumps Over
The Lazy
Dog
All the help is welcome and I thank you in advance.
(PS: The included libraries are the only ones allowed.)
答案1
得分: 2
这个程序适用于您的输入,它使用一个小的状态机来防止在引号之间分割。对于比单个分割标记更复杂的情况,strtok
显然有一些限制。
#include <stdio.h>
#include <stdlib.h>
void prn(char* str) {
printf("<< %s >>\n", str);
}
int main(){
char command[BUFSIZ];
char state = 0;
char *start = NULL;
char *cur = NULL;
fgets(command, BUFSIZ, stdin);
start = cur = command;
while (*cur) {
if (state == 0 && *cur == ' ') {
/* space outside quotes */
*cur = 0;
prn(start);
start = cur+1;
cur++;
} else if (*cur == '"') {
/* quote found */
*cur = 0;
if (state) {
/* end quote -- print */
prn(start);
/* skip past spaces */
cur++;
while (*cur == ' ')
cur++;
} else {
/* in quote, move cursor forward */
cur++;
}
/* flip state and reset start */
state ^= 1;
start = cur;
} else {
cur++;
}
if (cur - command >= BUFSIZ) {
fprintf(stderr, "Buffer overrun\n");
return -1;
}
}
/* print the last string */
prn(start);
return 0;
}
输出:
➜ echo -n 'The Brown "Fox Jumps Over" "The Lazy" Dog' | ./a.out
<< The >>
<< Brown >>
<< Fox Jumps Over >>
<< The Lazy >>
<< Dog >>
[编辑:根据反馈进行了整理,只打印限定以捕获任何潜在的空格]
英文:
This program works for your input, it employs a tiny state machine that prevents splitting between quotes. strtok
is pretty limited for cases more complicated than a single split token IMO:
#include <stdio.h>
#include <stdlib.h>
void prn(char* str) {
printf("<< %s >>\n", str);
}
int main(){
char command[BUFSIZ];
char state = 0;
char *start = NULL;
char *cur = NULL;
fgets(command, BUFSIZ, stdin);
start = cur = command;
while (*cur) {
if (state == 0 && *cur == ' ') {
/* space outside quotes */
*cur = 0;
prn(start);
start = cur+1;
cur++;
} else if (*cur == '"') {
/* quote found */
*cur = 0;
if (state) {
/* end quote -- print */
prn(start);
/* skip past spaces */
cur++;
while (*cur == ' ')
cur++;
} else {
/* in quote, move cursor forward */
cur++;
}
/* flip state and reset start */
state ^= 1;
start = cur;
} else {
cur++;
}
if (cur - command >= BUFSIZ) {
fprintf(stderr, "Buffer overrun\n");
return -1;
}
}
/* print the last string */
prn(start);
return 0;
}
The output:
➜ echo -n 'The Brown "Fox Jumps Over" "The Lazy" Dog' |./a.out
<< The >>
<< Brown >>
<< Fox Jumps Over >>
<< The Lazy >>
<< Dog >>
[edit: tidied following feedback, printing delimited to catch any sneaky spaces creeping through]
答案2
得分: 0
这也使用了状态机。与其他回答不同,它通过具有3个状态,即empty
,word
和quote
,来忽略单词内部的引号。它还具有错误检测功能,处理制表符和多个空格,但更为复杂。
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <assert.h>
/*!conditions:re2c*/
static int scan(char **text, const char **token) {
char *YYCURSOR = *text, *yyt1, *open = 0, *close = 0;
enum YYCONDTYPE condition = yycempty;
assert(text && token);
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYGETCONDITION = "condition";
re2c:define:YYSETCONDITION = "condition = @@;";
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION:naked = 1;
space = [ \t\v\n];
nul = "\x00";
quote = "\"";
*/
for( ; ; ) { /*!re2c /**/
<empty> nul { return *token = 0, 0; }
<empty> space+ { continue; } /* Leading space. */
<empty> quote @open => quote
<empty> @open * => word
<word> nul { return *token = open, 1; }
<word> @close space
{ *close = '\0'; *text = close + 1; *token = open; return 1; }
<word> * { continue; }
<quote> @close quote
{ *close = '\0'; *text = close + 1; *token = open; return 1; }
<quote> nul { return errno = EILSEQ, 0; }
<quote> * { continue; }
*/
}
}
int main(void) {
int success = EXIT_SUCCESS;
char command[BUFSIZ], *input;
const char *token;
errno = 0;
if(!(input = fgets(command,BUFSIZ,stdin))) goto catch;
while(scan(&input, &token)) printf( "%s\n", token );
if(errno) goto catch;
goto finally;
catch:
success = EXIT_FAILURE;
if(errno) perror("capture");
finally:
return success;
}
使用re2c作为工具,运行以下命令来生成代码:re2c -W -T -c -o main.re.c main.c
。
英文:
This also uses a state machine. Unlike the other answer, it ignores quotes inside words by having 3 states, empty
, word
, and quote
. It also has error detection, and handles tabs and multiple spaces, but is more complex.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <assert.h>
/*!conditions:re2c*/
static int scan(char **text, const char **token) {
char *YYCURSOR = *text, *yyt1, *open = 0, *close = 0;
enum YYCONDTYPE condition = yycempty;
assert(text && token);
/*!re2c /**/
re2c:define:YYCTYPE = char;
re2c:yyfill:enable = 0;
re2c:define:YYGETCONDITION = "condition";
re2c:define:YYSETCONDITION = "condition = @@;";
re2c:define:YYGETCONDITION:naked = 1;
re2c:define:YYSETCONDITION:naked = 1;
space = [ \t\v\n];
nul = "\x00";
quote = "\"";
*/
for( ; ; ) { /*!re2c /**/
<empty> nul { return *token = 0, 0; }
<empty> space+ { continue; } /* Leading space. */
<empty> quote @open :=> quote
<empty> @open * :=> word
<word> nul { return *token = open, 1; }
<word> @close space
{ *close = '\0'; *text = close + 1; *token = open; return 1; }
<word> * { continue; }
<quote> @close quote
{ *close = '\0'; *text = close + 1; *token = open; return 1; }
<quote> nul { return errno = EILSEQ, 0; }
<quote> * { continue; }
*/
}
}
int main(void) {
int success = EXIT_SUCCESS;
char command[BUFSIZ], *input;
const char *token;
errno = 0;
if(!(input = fgets(command,BUFSIZ,stdin))) goto catch;
while(scan(&input, &token)) printf( "%s\n", token );
if(errno) goto catch;
goto finally;
catch:
success = EXIT_FAILURE;
if(errno) perror("capture");
finally:
return success;
}
Uses re2c as re2c -W -T -c -o main.re.c main.c
to generate the code.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论