问题

我有一个数据框（tibble）。我正在寻找一种检测数据中特定变量序列的方法。在这个示例中有3个变量，但可以有数十个。我展示了70行数据，但可能有数十万行。我有一个用于检测命名列表中数据框的序列。在示例中，有标记为A和B的2个序列，但在实际情况中可能有大约100个，所以我选择了这种结构来存储它们。

数据：

library(tidyverse)
data1 &lt;- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
                               13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
                               29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
                               45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
                               61, 62, 63, 64, 65, 66, 67, 68, 69, 70), x1 = c(&quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;), x2 = c(&quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, 
                                                                                                 &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                 &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;), x3 = c(&quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                                                                      &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                                                                      &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;)), row.names = c(NA, -70L), class = c(&quot;tbl_df&quot;, 
                                                                                                                                                                                                          &quot;tbl&quot;, &quot;data.frame&quot;))

序列检

英文:

I have a dataframe (tibble). I'm looking for a method to detect specific sequences of variables in a data. There are 3 variables in the reprex, but there can be dozens of them. I'm showing 70 rows of data, and there could be several hundred thousand of them. I have a sequence to detect dataframes in a named list. In the reprex there are 2 sequences marked A and B, but in practice there can be about 100 of them, so I chose this structure to store them.

Data:

library(tidyverse)
data1 &lt;- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
                               13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
                               29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
                               45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
                               61, 62, 63, 64, 65, 66, 67, 68, 69, 70), x1 = c(&quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                               &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, 
                                                                               &quot;z&quot;, &quot;z&quot;), x2 = c(&quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, 
                                                                                                 &quot;y&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                 &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                 &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;), x3 = c(&quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                                                                      &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, 
                                                                                                                                                      &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, 
                                                                                                                                                      &quot;c&quot;, &quot;c&quot;, &quot;c&quot;)), row.names = c(NA, -70L), class = c(&quot;tbl_df&quot;, 
                                                                                                                                                                                                          &quot;tbl&quot;, &quot;data.frame&quot;))

<sup>Created on 2023-07-17 with reprex v2.0.2</sup>

Sequences to detection:

seqs &lt;- list(A = structure(list(ID = c(1, 2, 3, 4, 5),
                        x1 = c(&quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;),
                        x2 = c(&quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;c&quot;, &quot;c&quot;),
                        x3 = c(&quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;)),
                   class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;), row.names = c(NA, -5L)),
     B = structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8),
                        x1 = c(&quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;),
                        x2 = c(&quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;a&quot;),
                        x3 = c(&quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;, &quot;c&quot;)),
                   class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;), row.names = c(NA, -8L)))

<sup>Created on 2023-07-17 with reprex v2.0.2</sup>

I would like to get such a result, where in the column I get information in which second a sequence starts. The searched sequences in the reprex are separated by other sequences that are not relevant to me. It is important that the detection of the sequence is the detection of the sequence for all variables (sequences may differ very slightly, only by one value of one variable). I only need to find the beginning of the sequence, because its duration is known (the number of lines of the data frame with the pattern of the sequence).

      ID x1    x2    x3    det_seq
   &lt;dbl&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt;  
 1     1 z     z     c     NA     
 2     2 z     z     c     NA     
 3     3 z     z     c     NA     
 4     4 z     z     c     NA     
 5     5 z     z     c     NA     
 6     6 z     z     c     NA     
 7     7 z     z     c     NA     
 8     8 y     y     c     A      
 9     9 y     y     c     NA     
10    10 y     y     c     NA     
11    11 c     c     c     NA     
12    12 c     c     c     NA     
13    13 c     c     z     NA     
14    14 c     c     z     NA     
15    15 c     c     z     NA     
16    16 c     c     z     NA     
17    17 c     c     z     NA     
18    18 c     c     z     NA     
19    19 c     c     z     NA     
20    20 c     c     z     NA     
21    21 c     c     z     NA     
22    22 c     c     z     NA     
23    23 c     c     f     B      
24    24 c     c     f     NA     
25    25 c     c     f     NA     
26    26 c     c     f     NA     
27    27 c     c     c     NA     
28    28 c     c     c     NA     
29    29 c     c     c     NA     
30    30 a     a     c     NA     
31    31 z     z     c     NA     
32    32 z     z     c     NA     
33    33 z     z     c     NA     
34    34 z     z     c     NA     
35    35 z     z     c     NA     
36    36 z     z     c     NA     
37    37 z     z     c     NA     
38    38 z     z     c     NA     
39    39 z     z     c     NA     
40    40 z     z     c     NA     
41    41 z     z     c     NA     
42    42 z     z     c     NA     
43    43 z     z     c     NA     
44    44 z     z     c     NA     
45    45 y     y     c     A      
46    46 y     y     c     NA     
47    47 y     y     c     NA     
48    48 c     c     c     NA     
49    49 c     c     c     NA     
50    50 c     c     z     NA     
51    51 c     c     z     NA     
52    52 c     c     z     NA     
53    53 c     c     z     NA     
54    54 c     c     z     NA     
55    55 c     c     z     NA     
56    56 c     c     z     NA     
57    57 c     c     z     NA     
58    58 c     c     z     NA     
59    59 c     c     z     NA     
60    60 c     c     f     B      
61    61 c     c     f     NA     
62    62 c     c     f     NA     
63    63 c     c     f     NA     
64    64 c     c     c     NA     
65    65 c     c     c     NA     
66    66 c     c     c     NA     
67    67 a     a     c     NA     
68    68 z     z     c     NA     
69    69 z     z     c     NA     
70    70 z     z     c     NA

答案1

得分: 0

以下是翻译好的代码部分：

这是一种方法：

```R
data1 %>%
  mutate(det_seq = map_chr(seq_along(1:nrow(data1)), 
                             ~ case_when(identical(data1[.x:(.x+4), 2:4], seqs$A[,2:4]) ~ "A",
                                   identical(data1[.x:(.x+7), 2:4], seqs$B[,2:4]) ~ "B",
                                   TRUE ~ "NA")))

更新：为了使其能够匹配任何大小的seqs数据框列表，使用以下代码块代替：

data1 %>%
  mutate(det_seq = map_chr(seq_along(1:nrow(data1)), 
                    \(x)  first(names(seqs)[map_lgl(seqs,
                     \(s) identical(data1[x:(x+nrow(s)-1), 2:4], s[,2:4]))])))

英文:

Here's one approach:

data1 %&gt;% 
mutate(det_seq = map_chr(seq_along(1:nrow(data1)), 
~ case_when(identical(data1[.x:(.x+4), 2:4], seqs$A[,2:4]) ~ &quot;A&quot;,
identical(data1[.x:(.x+7), 2:4], seqs$B[,2:4]) ~ &quot;B&quot;,
TRUE ~ &quot;NA&quot;)))

Update: To make it so that it can match a seqs list of dataframes of any size, use the following chunk of code instead:

data1 %&gt;% 
mutate(det_seq = map_chr(seq_along(1:nrow(data1)), 
\(x)  first(names(seqs)[map_lgl(seqs,
\(s) identical(data1[x:(x+nrow(s)-1), 2:4], s[,2:4]))])))

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

数据帧中的序列检测

问题

答案1

将多列文本拆分成不同列的R代码示例：

正则表达式以忽略在值前面带有小数点的部分？

Loop function on timeseries works on small df, but not in large df – Error: C stack usage…too close to the limit

将”groupby”的结果展开/扩展，并保持与”groupby”之前相同的排序/索引。

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论