基于日期时间列条件创建一个新列 R。

huangapple go评论91阅读模式
英文:

create a column based on datetime column condition R

问题

我有以下的数据示例:

  1. structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
  2. 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
  3. 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
  4. 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
  5. 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
  6. 1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
  7. "POSIXt"))), row.names = c(NA, -21L), class = c("data.table",
  8. "data.frame"))

我想创建一个名为 cycle 的列,用于比较一个日期时间行和上一行的日期时间,如果它们之间的时间差大于5分钟,则cycle列中的值将是 "cycle_2",下一个大于5分钟的时间差将是 "cycle_3",以此类推,就像下面的示例一样:

  1. structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
  2. 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
  3. 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
  4. 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
  5. 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
  6. 1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct",
  7. "POSIXt")), diff_min = c(0.0165833353996277, 0.0166499972343445,
  8. 0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014,
  9. 0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106,
  10. 0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875,
  11. 0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333,
  12. 0), cycle = c("ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1",
  13. "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_2", "ciclo_2", "ciclo_2",
  14. "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2",
  15. "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2")), row.names = c(NA,
  16. -21L), class = c("data.table", "data.frame"))

我目前的代码是:

  1. df %>%
  2. arrange(datetime) %>%
  3. mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min < 5, "ciclo_1", paste0("ciclo_", cumsum(diff_min >= 5) + 1)))

感谢任何帮助。

英文:

I have the following data example:

  1. structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
  2. 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
  3. 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
  4. 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
  5. 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
  6. 1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;,
  7. &quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;,
  8. &quot;data.frame&quot;))

I would like to create a column cycle, that compare a lrow of a datetime and the row above, if this were a time difference greater that 5 minutes the name in cycle will to be cycle_2, the next difference greater than 5 minutes will be cycle 3 and so on. As the example below.

  1. structure(list(datetime = structure(c(1662185434.08, 1662185435.075,
  2. 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067,
  3. 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793,
  4. 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019,
  5. 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02,
  6. 1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;,
  7. &quot;POSIXt&quot;)), diff_min = c(0.0165833353996277, 0.0166499972343445,
  8. 0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014,
  9. 0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106,
  10. 0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875,
  11. 0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333,
  12. 0), cycle = c(&quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;,
  13. &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;,
  14. &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;,
  15. &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;)), row.names = c(NA,
  16. -21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;))

My code at the moment is:

  1. df %&gt;%
  2. arrange(datetime) %&gt;%
  3. mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min &lt; 5, &quot;ciclo_1&quot;, paste0(&quot;ciclo_&quot;, cumsum(diff_min &gt;= 5) + 1)))

Thanks any help

答案1

得分: 3

我们可以在这里使用cumsum

  1. library(dplyr)
  2. quux %>%
  3. arrange(datetime) %>%
  4. mutate(
  5. diff_min = c(as.numeric(diff(datetime), units = "mins"), 0),
  6. cycle = 1 + cumsum(diff_min > 5)
  7. )
  8. # datetime diff_min cycle
  9. # 1 2022-09-03 06:10:34 0.01658334 1
  10. # 2 2022-09-03 06:10:35 0.01665000 1
  11. # 3 2022-09-03 06:10:36 0.01660000 1
  12. # 4 2022-09-03 06:10:37 0.03328333 1
  13. # 5 2022-09-03 06:10:39 0.00000000 1
  14. # 6 2022-09-03 06:10:39 0.05003333 1
  15. # 7 2022-09-03 06:10:42 0.03336667 1
  16. # 8 2022-09-03 06:10:44 0.01676666 1
  17. # 9 2022-09-03 06:10:45 360.79526667 2
  18. # 10 2022-09-03 12:11:32 0.02008333 2
  19. # 11 2022-09-03 12:11:33 0.05041667 2
  20. # 12 2022-09-03 12:11:37 0.03336667 2
  21. # 13 2022-09-03 12:11:39 0.01656667 2
  22. # 14 2022-09-03 12:11:40 0.01665000 2
  23. # 15 2022-09-03 12:11:41 0.00000000 2
  24. # 16 2022-09-03 12:11:41 0.00000000 2
  25. # 17 2022-09-03 12:11:41 0.01670000 2
  26. # 18 2022-09-03 12:11:42 0.00000000 2
  27. # 19 2022-09-03 12:11:42 0.08311667 2
  28. # 20 2022-09-03 12:11:47 0.03333333 2
  29. # 21 2022-09-03 12:11:49 0.00000000 2

如果你愿意的话,你可以使用paste0("ciclo_", cycle)

由于你的数据最初是"data.table"类,这里是另一种方法:

  1. library(data.table)
  2. quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = "mins") >= 5))]

如果你不需要中间的diff_min变量,可以使用这种方法。

英文:

We can use cumsum here:

  1. library(dplyr)
  2. quux %&gt;%
  3. arrange(datetime) %&gt;%
  4. mutate(
  5. diff_min = c(as.numeric(diff(datetime), units = &quot;mins&quot;), 0),
  6. cycle = 1+cumsum(diff_min &gt; 5)
  7. )
  8. # datetime diff_min cycle
  9. # 1 2022-09-03 06:10:34 0.01658334 1
  10. # 2 2022-09-03 06:10:35 0.01665000 1
  11. # 3 2022-09-03 06:10:36 0.01660000 1
  12. # 4 2022-09-03 06:10:37 0.03328333 1
  13. # 5 2022-09-03 06:10:39 0.00000000 1
  14. # 6 2022-09-03 06:10:39 0.05003333 1
  15. # 7 2022-09-03 06:10:42 0.03336667 1
  16. # 8 2022-09-03 06:10:44 0.01676666 1
  17. # 9 2022-09-03 06:10:45 360.79526667 2
  18. # 10 2022-09-03 12:11:32 0.02008333 2
  19. # 11 2022-09-03 12:11:33 0.05041667 2
  20. # 12 2022-09-03 12:11:37 0.03336667 2
  21. # 13 2022-09-03 12:11:39 0.01656667 2
  22. # 14 2022-09-03 12:11:40 0.01665000 2
  23. # 15 2022-09-03 12:11:41 0.00000000 2
  24. # 16 2022-09-03 12:11:41 0.00000000 2
  25. # 17 2022-09-03 12:11:41 0.01670000 2
  26. # 18 2022-09-03 12:11:42 0.00000000 2
  27. # 19 2022-09-03 12:11:42 0.08311667 2
  28. # 20 2022-09-03 12:11:47 0.03333333 2
  29. # 21 2022-09-03 12:11:49 0.00000000 2

to which you can paste0(&quot;ciclo_&quot;, cycle) if you'd like.

Since your data is originally class &quot;data.table&quot;, here's an alternative:

  1. library(data.table)
  2. quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = &quot;mins&quot;) &gt;= 5))]

without the intermediate diff_min, in case you don't need that variable for anything else.


Data

  1. quux &lt;- data.table::as.data.table(structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, &quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;)))

huangapple
  • 本文由 发表于 2023年7月14日 03:14:53
  • 转载请务必保留本文链接:https://go.coder-hub.com/76682575.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定