捕获空格和引号内的单词?

huangapple go评论91阅读模式
英文:

Capturing words within spaces and quotation marks?

问题

The code you provided aims to split a string into words separated by spaces. To achieve the desired output, you need to handle quotation marks correctly. Here's the modified code with the necessary changes:

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. int main () {
  6. char command[BUFSIZ];
  7. char *token;
  8. fgets(command, BUFSIZ, stdin);
  9. // Initialize a variable to track whether we are inside quotation marks
  10. int insideQuotes = 0;
  11. // Split the string by space and quotation marks
  12. token = strtok(command, " ");
  13. while( token != NULL ) {
  14. // Check if the token starts with a quotation mark
  15. if (token[0] == '"') {
  16. if (!insideQuotes) {
  17. // If not inside quotes, remove the starting quotation mark
  18. printf("%s\n", &token[1]);
  19. insideQuotes = 1;
  20. } else {
  21. // If inside quotes, remove the ending quotation mark
  22. printf("%s ", &token[0]);
  23. insideQuotes = 0;
  24. }
  25. } else {
  26. // If not within quotes, print the token
  27. printf("%s\n", token);
  28. }
  29. token = strtok(NULL, " ");
  30. }
  31. return 0;
  32. }

With this code, you should achieve the desired output:

  1. The
  2. Brown
  3. Fox Jumps Over
  4. The Lazy
  5. Dog
英文:

The idea, explicit in the title, is to capture words within spaces and quotation marks here's an example of the input we are dealing with:

  1. Input:
  2. The Brown &quot;Fox Jumps Over&quot; &quot;The Lazy&quot; Dog

Currently my code can capture words within spaces, as many of you know, a basic strtok() is enough. Here's my code so far:

  1. #include &lt;stdlib.h&gt;
  2. #include &lt;stdio.h&gt;
  3. #include &lt;string.h&gt;
  4. #include &lt;ctype.h&gt;
  5. int main () {
  6. char command[BUFSIZ];
  7. char *token;
  8. fgets(command,BUFSIZ,stdin);
  9. token = strtok(command, &quot; &quot;);
  10. while( token != NULL ) {
  11. printf( &quot; %s\n&quot;, token );
  12. token = strtok(NULL, &quot; &quot;);
  13. }
  14. return 0;
  15. }

And as expected, my code prints the following:

  1. Current Output:
  2. The
  3. Brown
  4. &quot;Fox
  5. Jumps
  6. Over&quot;
  7. &quot;The
  8. Lazy&quot;
  9. Dog

But the whole idea and problem is to get the following output:

  1. The
  2. Brown
  3. Fox Jumps Over
  4. The Lazy
  5. Dog

All the help is welcome and I thank you in advance.
(PS: The included libraries are the only ones allowed.)

答案1

得分: 2

这个程序适用于您的输入,它使用一个小的状态机来防止在引号之间分割。对于比单个分割标记更复杂的情况,strtok 显然有一些限制。

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. void prn(char* str) {
  4. printf("<< %s >>\n", str);
  5. }
  6. int main(){
  7. char command[BUFSIZ];
  8. char state = 0;
  9. char *start = NULL;
  10. char *cur = NULL;
  11. fgets(command, BUFSIZ, stdin);
  12. start = cur = command;
  13. while (*cur) {
  14. if (state == 0 && *cur == ' ') {
  15. /* space outside quotes */
  16. *cur = 0;
  17. prn(start);
  18. start = cur+1;
  19. cur++;
  20. } else if (*cur == '"') {
  21. /* quote found */
  22. *cur = 0;
  23. if (state) {
  24. /* end quote -- print */
  25. prn(start);
  26. /* skip past spaces */
  27. cur++;
  28. while (*cur == ' ')
  29. cur++;
  30. } else {
  31. /* in quote, move cursor forward */
  32. cur++;
  33. }
  34. /* flip state and reset start */
  35. state ^= 1;
  36. start = cur;
  37. } else {
  38. cur++;
  39. }
  40. if (cur - command >= BUFSIZ) {
  41. fprintf(stderr, "Buffer overrun\n");
  42. return -1;
  43. }
  44. }
  45. /* print the last string */
  46. prn(start);
  47. return 0;
  48. }

输出:

  1. echo -n 'The Brown "Fox Jumps Over" "The Lazy" Dog' | ./a.out
  2. << The >>
  3. << Brown >>
  4. << Fox Jumps Over >>
  5. << The Lazy >>
  6. << Dog >>

[编辑:根据反馈进行了整理,只打印限定以捕获任何潜在的空格]

英文:

This program works for your input, it employs a tiny state machine that prevents splitting between quotes. strtok is pretty limited for cases more complicated than a single split token IMO:

  1. #include &lt;stdio.h&gt;
  2. #include &lt;stdlib.h&gt;
  3. void prn(char* str) {
  4. printf(&quot;&lt;&lt; %s &gt;&gt;\n&quot;, str);
  5. }
  6. int main(){
  7. char command[BUFSIZ];
  8. char state = 0;
  9. char *start = NULL;
  10. char *cur = NULL;
  11. fgets(command, BUFSIZ, stdin);
  12. start = cur = command;
  13. while (*cur) {
  14. if (state == 0 &amp;&amp; *cur == &#39; &#39;) {
  15. /* space outside quotes */
  16. *cur = 0;
  17. prn(start);
  18. start = cur+1;
  19. cur++;
  20. } else if (*cur == &#39;&quot;&#39;) {
  21. /* quote found */
  22. *cur = 0;
  23. if (state) {
  24. /* end quote -- print */
  25. prn(start);
  26. /* skip past spaces */
  27. cur++;
  28. while (*cur == &#39; &#39;)
  29. cur++;
  30. } else {
  31. /* in quote, move cursor forward */
  32. cur++;
  33. }
  34. /* flip state and reset start */
  35. state ^= 1;
  36. start = cur;
  37. } else {
  38. cur++;
  39. }
  40. if (cur - command &gt;= BUFSIZ) {
  41. fprintf(stderr, &quot;Buffer overrun\n&quot;);
  42. return -1;
  43. }
  44. }
  45. /* print the last string */
  46. prn(start);
  47. return 0;
  48. }

The output:

  1. echo -n &#39;The Brown &quot;Fox Jumps Over&quot; &quot;The Lazy&quot; Dog&#39; |./a.out
  2. &lt;&lt; The &gt;&gt;
  3. &lt;&lt; Brown &gt;&gt;
  4. &lt;&lt; Fox Jumps Over &gt;&gt;
  5. &lt;&lt; The Lazy &gt;&gt;
  6. &lt;&lt; Dog &gt;&gt;

[edit: tidied following feedback, printing delimited to catch any sneaky spaces creeping through]

答案2

得分: 0

这也使用了状态机。与其他回答不同,它通过具有3个状态,即emptywordquote,来忽略单词内部的引号。它还具有错误检测功能,处理制表符和多个空格,但更为复杂。

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <ctype.h>
  5. #include <errno.h>
  6. #include <assert.h>
  7. /*!conditions:re2c*/
  8. static int scan(char **text, const char **token) {
  9. char *YYCURSOR = *text, *yyt1, *open = 0, *close = 0;
  10. enum YYCONDTYPE condition = yycempty;
  11. assert(text && token);
  12. /*!re2c /**/
  13. re2c:define:YYCTYPE = char;
  14. re2c:yyfill:enable = 0;
  15. re2c:define:YYGETCONDITION = "condition";
  16. re2c:define:YYSETCONDITION = "condition = @@;";
  17. re2c:define:YYGETCONDITION:naked = 1;
  18. re2c:define:YYSETCONDITION:naked = 1;
  19. space = [ \t\v\n];
  20. nul = "\x00";
  21. quote = "\"";
  22. */
  23. for( ; ; ) { /*!re2c /**/
  24. <empty> nul { return *token = 0, 0; }
  25. <empty> space+ { continue; } /* Leading space. */
  26. <empty> quote @open => quote
  27. <empty> @open * => word
  28. <word> nul { return *token = open, 1; }
  29. <word> @close space
  30. { *close = '\0'; *text = close + 1; *token = open; return 1; }
  31. <word> * { continue; }
  32. <quote> @close quote
  33. { *close = '\0'; *text = close + 1; *token = open; return 1; }
  34. <quote> nul { return errno = EILSEQ, 0; }
  35. <quote> * { continue; }
  36. */
  37. }
  38. }
  39. int main(void) {
  40. int success = EXIT_SUCCESS;
  41. char command[BUFSIZ], *input;
  42. const char *token;
  43. errno = 0;
  44. if(!(input = fgets(command,BUFSIZ,stdin))) goto catch;
  45. while(scan(&input, &token)) printf( "%s\n", token );
  46. if(errno) goto catch;
  47. goto finally;
  48. catch:
  49. success = EXIT_FAILURE;
  50. if(errno) perror("capture");
  51. finally:
  52. return success;
  53. }

使用re2c作为工具,运行以下命令来生成代码:re2c -W -T -c -o main.re.c main.c

英文:

This also uses a state machine. Unlike the other answer, it ignores quotes inside words by having 3 states, empty, word, and quote. It also has error detection, and handles tabs and multiple spaces, but is more complex.

  1. #include &lt;stdlib.h&gt;
  2. #include &lt;stdio.h&gt;
  3. #include &lt;string.h&gt;
  4. #include &lt;ctype.h&gt;
  5. #include &lt;errno.h&gt;
  6. #include &lt;assert.h&gt;
  7. /*!conditions:re2c*/
  8. static int scan(char **text, const char **token) {
  9. char *YYCURSOR = *text, *yyt1, *open = 0, *close = 0;
  10. enum YYCONDTYPE condition = yycempty;
  11. assert(text &amp;&amp; token);
  12. /*!re2c /**/
  13. re2c:define:YYCTYPE = char;
  14. re2c:yyfill:enable = 0;
  15. re2c:define:YYGETCONDITION = &quot;condition&quot;;
  16. re2c:define:YYSETCONDITION = &quot;condition = @@;&quot;;
  17. re2c:define:YYGETCONDITION:naked = 1;
  18. re2c:define:YYSETCONDITION:naked = 1;
  19. space = [ \t\v\n];
  20. nul = &quot;\x00&quot;;
  21. quote = &quot;\&quot;&quot;;
  22. */
  23. for( ; ; ) { /*!re2c /**/
  24. &lt;empty&gt; nul { return *token = 0, 0; }
  25. &lt;empty&gt; space+ { continue; } /* Leading space. */
  26. &lt;empty&gt; quote @open :=&gt; quote
  27. &lt;empty&gt; @open * :=&gt; word
  28. &lt;word&gt; nul { return *token = open, 1; }
  29. &lt;word&gt; @close space
  30. { *close = &#39;\0&#39;; *text = close + 1; *token = open; return 1; }
  31. &lt;word&gt; * { continue; }
  32. &lt;quote&gt; @close quote
  33. { *close = &#39;\0&#39;; *text = close + 1; *token = open; return 1; }
  34. &lt;quote&gt; nul { return errno = EILSEQ, 0; }
  35. &lt;quote&gt; * { continue; }
  36. */
  37. }
  38. }
  39. int main(void) {
  40. int success = EXIT_SUCCESS;
  41. char command[BUFSIZ], *input;
  42. const char *token;
  43. errno = 0;
  44. if(!(input = fgets(command,BUFSIZ,stdin))) goto catch;
  45. while(scan(&amp;input, &amp;token)) printf( &quot;%s\n&quot;, token );
  46. if(errno) goto catch;
  47. goto finally;
  48. catch:
  49. success = EXIT_FAILURE;
  50. if(errno) perror(&quot;capture&quot;);
  51. finally:
  52. return success;
  53. }

Uses re2c as re2c -W -T -c -o main.re.c main.c to generate the code.

huangapple
  • 本文由 发表于 2023年3月21日 01:53:41
  • 转载请务必保留本文链接:https://go.coder-hub.com/75793687-2.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定