2023年7月14日 03:14:53go评论91阅读模式

英文:

create a column based on datetime column condition R

问题

我有以下的数据示例：

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct", 
"POSIXt"))), row.names = c(NA, -21L), class = c("data.table", 
"data.frame"))

我想创建一个名为 cycle 的列，用于比较一个日期时间行和上一行的日期时间，如果它们之间的时间差大于5分钟，则cycle列中的值将是 "cycle_2"，下一个大于5分钟的时间差将是 "cycle_3"，以此类推，就像下面的示例一样：

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), diff_min = c(0.0165833353996277, 0.0166499972343445, 
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014, 
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106, 
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875, 
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333, 
0), cycle = c("ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", 
"ciclo_1", "ciclo_1", "ciclo_1", "ciclo_2", "ciclo_2", "ciclo_2", 
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", 
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2")), row.names = c(NA, 
-21L), class = c("data.table", "data.frame"))

我目前的代码是：

df %>%
arrange(datetime) %>%
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min < 5, "ciclo_1", paste0("ciclo_", cumsum(diff_min >= 5) + 1)))

感谢任何帮助。

英文:

I have the following data example:

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, 
&quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;, 
&quot;data.frame&quot;))

I would like to create a column cycle, that compare a lrow of a datetime and the row above, if this were a time difference greater that 5 minutes the name in cycle will to be cycle_2, the next difference greater than 5 minutes will be cycle 3 and so on. As the example below.

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, 
&quot;POSIXt&quot;)), diff_min = c(0.0165833353996277, 0.0166499972343445, 
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014, 
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106, 
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875, 
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333, 
0), cycle = c(&quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, 
&quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, 
&quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, 
&quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;)), row.names = c(NA, 
-21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;))

My code at the moment is:

df %&gt;%
arrange(datetime) %&gt;% 
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min &lt; 5, &quot;ciclo_1&quot;, paste0(&quot;ciclo_&quot;, cumsum(diff_min &gt;= 5) + 1)))

Thanks any help

答案1

得分: 3

我们可以在这里使用cumsum：

library(dplyr)
quux %>%
  arrange(datetime) %>%
  mutate(
    diff_min = c(as.numeric(diff(datetime), units = "mins"), 0),
    cycle = 1 + cumsum(diff_min > 5)
  )
#               datetime     diff_min cycle
# 1  2022-09-03 06:10:34   0.01658334     1
# 2  2022-09-03 06:10:35   0.01665000     1
# 3  2022-09-03 06:10:36   0.01660000     1
# 4  2022-09-03 06:10:37   0.03328333     1
# 5  2022-09-03 06:10:39   0.00000000     1
# 6  2022-09-03 06:10:39   0.05003333     1
# 7  2022-09-03 06:10:42   0.03336667     1
# 8  2022-09-03 06:10:44   0.01676666     1
# 9  2022-09-03 06:10:45 360.79526667     2
# 10 2022-09-03 12:11:32   0.02008333     2
# 11 2022-09-03 12:11:33   0.05041667     2
# 12 2022-09-03 12:11:37   0.03336667     2
# 13 2022-09-03 12:11:39   0.01656667     2
# 14 2022-09-03 12:11:40   0.01665000     2
# 15 2022-09-03 12:11:41   0.00000000     2
# 16 2022-09-03 12:11:41   0.00000000     2
# 17 2022-09-03 12:11:41   0.01670000     2
# 18 2022-09-03 12:11:42   0.00000000     2
# 19 2022-09-03 12:11:42   0.08311667     2
# 20 2022-09-03 12:11:47   0.03333333     2
# 21 2022-09-03 12:11:49   0.00000000     2

如果你愿意的话，你可以使用paste0("ciclo_", cycle)。

由于你的数据最初是"data.table"类，这里是另一种方法：

library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = "mins") >= 5))]

如果你不需要中间的diff_min变量，可以使用这种方法。

英文:

We can use cumsum here:

library(dplyr)
quux %&gt;%
  arrange(datetime) %&gt;%
  mutate(
    diff_min = c(as.numeric(diff(datetime), units = &quot;mins&quot;), 0),
    cycle = 1+cumsum(diff_min &gt; 5)
  )
#               datetime     diff_min cycle
# 1  2022-09-03 06:10:34   0.01658334     1
# 2  2022-09-03 06:10:35   0.01665000     1
# 3  2022-09-03 06:10:36   0.01660000     1
# 4  2022-09-03 06:10:37   0.03328333     1
# 5  2022-09-03 06:10:39   0.00000000     1
# 6  2022-09-03 06:10:39   0.05003333     1
# 7  2022-09-03 06:10:42   0.03336667     1
# 8  2022-09-03 06:10:44   0.01676666     1
# 9  2022-09-03 06:10:45 360.79526667     2
# 10 2022-09-03 12:11:32   0.02008333     2
# 11 2022-09-03 12:11:33   0.05041667     2
# 12 2022-09-03 12:11:37   0.03336667     2
# 13 2022-09-03 12:11:39   0.01656667     2
# 14 2022-09-03 12:11:40   0.01665000     2
# 15 2022-09-03 12:11:41   0.00000000     2
# 16 2022-09-03 12:11:41   0.00000000     2
# 17 2022-09-03 12:11:41   0.01670000     2
# 18 2022-09-03 12:11:42   0.00000000     2
# 19 2022-09-03 12:11:42   0.08311667     2
# 20 2022-09-03 12:11:47   0.03333333     2
# 21 2022-09-03 12:11:49   0.00000000     2

to which you can paste0("ciclo_", cycle) if you'd like.

Since your data is originally class "data.table", here's an alternative:

library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = &quot;mins&quot;) &gt;= 5))]

without the intermediate diff_min, in case you don't need that variable for anything else.

Data

quux &lt;- data.table::as.data.table(structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, &quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;)))

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

基于日期时间列条件创建一个新列 R。

问题

答案1

如果特定字符串不存在，则重命名列。

如何根据数据帧中其他列的值组合来更改列的值？

这个可视化的名称是什么？

如何在R中将经纬度坐标转换为兰伯特等积圆锥投影

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。