英文:
create a column based on datetime column condition R
问题
我有以下的数据示例:
structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -21L), class = c("data.table",
"data.frame"))
我想创建一个名为 cycle
的列,用于比较一个日期时间行和上一行的日期时间,如果它们之间的时间差大于5分钟,则cycle
列中的值将是 "cycle_2",下一个大于5分钟的时间差将是 "cycle_3",以此类推,就像下面的示例一样:
structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
"POSIXt")), diff_min = c(0.0165833353996277, 0.0166499972343445,
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014,
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106,
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875,
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333,
0), cycle = c("ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1",
"ciclo_1", "ciclo_1", "ciclo_1", "ciclo_2", "ciclo_2", "ciclo_2",
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2",
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2")), row.names = c(NA,
-21L), class = c("data.table", "data.frame"))
我目前的代码是:
df %>%
arrange(datetime) %>%
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min < 5, "ciclo_1", paste0("ciclo_", cumsum(diff_min >= 5) + 1)))
感谢任何帮助。
英文:
I have the following data example:
structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -21L), class = c("data.table",
"data.frame"))
I would like to create a column cycle
, that compare a lrow of a datetime and the row above, if this were a time difference greater that 5 minutes the name in cycle will to be cycle_2, the next difference greater than 5 minutes will be cycle 3 and so on. As the example below.
structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
"POSIXt")), diff_min = c(0.0165833353996277, 0.0166499972343445,
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014,
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106,
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875,
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333,
0), cycle = c("ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1",
"ciclo_1", "ciclo_1", "ciclo_1", "ciclo_2", "ciclo_2", "ciclo_2",
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2",
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2")), row.names = c(NA,
-21L), class = c("data.table", "data.frame"))
My code at the moment is:
df %>%
arrange(datetime) %>%
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min < 5, "ciclo_1", paste0("ciclo_", cumsum(diff_min >= 5) + 1)))
Thanks any help
答案1
得分: 3
我们可以在这里使用cumsum
:
library(dplyr)
quux %>%
arrange(datetime) %>%
mutate(
diff_min = c(as.numeric(diff(datetime), units = "mins"), 0),
cycle = 1 + cumsum(diff_min > 5)
)
# datetime diff_min cycle
# 1 2022-09-03 06:10:34 0.01658334 1
# 2 2022-09-03 06:10:35 0.01665000 1
# 3 2022-09-03 06:10:36 0.01660000 1
# 4 2022-09-03 06:10:37 0.03328333 1
# 5 2022-09-03 06:10:39 0.00000000 1
# 6 2022-09-03 06:10:39 0.05003333 1
# 7 2022-09-03 06:10:42 0.03336667 1
# 8 2022-09-03 06:10:44 0.01676666 1
# 9 2022-09-03 06:10:45 360.79526667 2
# 10 2022-09-03 12:11:32 0.02008333 2
# 11 2022-09-03 12:11:33 0.05041667 2
# 12 2022-09-03 12:11:37 0.03336667 2
# 13 2022-09-03 12:11:39 0.01656667 2
# 14 2022-09-03 12:11:40 0.01665000 2
# 15 2022-09-03 12:11:41 0.00000000 2
# 16 2022-09-03 12:11:41 0.00000000 2
# 17 2022-09-03 12:11:41 0.01670000 2
# 18 2022-09-03 12:11:42 0.00000000 2
# 19 2022-09-03 12:11:42 0.08311667 2
# 20 2022-09-03 12:11:47 0.03333333 2
# 21 2022-09-03 12:11:49 0.00000000 2
如果你愿意的话,你可以使用paste0("ciclo_", cycle)
。
由于你的数据最初是"data.table"
类,这里是另一种方法:
library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = "mins") >= 5))]
如果你不需要中间的diff_min
变量,可以使用这种方法。
英文:
We can use cumsum
here:
library(dplyr)
quux %>%
arrange(datetime) %>%
mutate(
diff_min = c(as.numeric(diff(datetime), units = "mins"), 0),
cycle = 1+cumsum(diff_min > 5)
)
# datetime diff_min cycle
# 1 2022-09-03 06:10:34 0.01658334 1
# 2 2022-09-03 06:10:35 0.01665000 1
# 3 2022-09-03 06:10:36 0.01660000 1
# 4 2022-09-03 06:10:37 0.03328333 1
# 5 2022-09-03 06:10:39 0.00000000 1
# 6 2022-09-03 06:10:39 0.05003333 1
# 7 2022-09-03 06:10:42 0.03336667 1
# 8 2022-09-03 06:10:44 0.01676666 1
# 9 2022-09-03 06:10:45 360.79526667 2
# 10 2022-09-03 12:11:32 0.02008333 2
# 11 2022-09-03 12:11:33 0.05041667 2
# 12 2022-09-03 12:11:37 0.03336667 2
# 13 2022-09-03 12:11:39 0.01656667 2
# 14 2022-09-03 12:11:40 0.01665000 2
# 15 2022-09-03 12:11:41 0.00000000 2
# 16 2022-09-03 12:11:41 0.00000000 2
# 17 2022-09-03 12:11:41 0.01670000 2
# 18 2022-09-03 12:11:42 0.00000000 2
# 19 2022-09-03 12:11:42 0.08311667 2
# 20 2022-09-03 12:11:47 0.03333333 2
# 21 2022-09-03 12:11:49 0.00000000 2
to which you can paste0("ciclo_", cycle)
if you'd like.
Since your data is originally class "data.table"
, here's an alternative:
library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = "mins") >= 5))]
without the intermediate diff_min
, in case you don't need that variable for anything else.
Data
quux <- data.table::as.data.table(structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct", "POSIXt"))), row.names = c(NA, -21L), class = c("data.table", "data.frame")))
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论