基于日期时间列条件创建一个新列 R。

huangapple go评论68阅读模式
英文:

create a column based on datetime column condition R

问题

我有以下的数据示例:

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct", 
"POSIXt"))), row.names = c(NA, -21L), class = c("data.table", 
"data.frame"))

我想创建一个名为 cycle 的列,用于比较一个日期时间行和上一行的日期时间,如果它们之间的时间差大于5分钟,则cycle列中的值将是 "cycle_2",下一个大于5分钟的时间差将是 "cycle_3",以此类推,就像下面的示例一样:

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), diff_min = c(0.0165833353996277, 0.0166499972343445, 
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014, 
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106, 
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875, 
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333, 
0), cycle = c("ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", "ciclo_1", 
"ciclo_1", "ciclo_1", "ciclo_1", "ciclo_2", "ciclo_2", "ciclo_2", 
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2", 
"ciclo_2", "ciclo_2", "ciclo_2", "ciclo_2")), row.names = c(NA, 
-21L), class = c("data.table", "data.frame"))

我目前的代码是:

df %>%
arrange(datetime) %>%
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min < 5, "ciclo_1", paste0("ciclo_", cumsum(diff_min >= 5) + 1)))

感谢任何帮助。

英文:

I have the following data example:

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, 
&quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;, 
&quot;data.frame&quot;))

I would like to create a column cycle, that compare a lrow of a datetime and the row above, if this were a time difference greater that 5 minutes the name in cycle will to be cycle_2, the next difference greater than 5 minutes will be cycle 3 and so on. As the example below.

structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 
1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 
1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 
1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 
1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 
1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, 
&quot;POSIXt&quot;)), diff_min = c(0.0165833353996277, 0.0166499972343445, 
0.0166000048319499, 0.0332833290100098, 0, 0.0500333348910014, 
0.0333666682243347, 0.0167666673660278, 360.795266664028, 0.0200833360354106, 
0.0504166642824809, 0.0333666682243347, 0.0165666659673055, 0.0166500012079875, 
0, 0, 0.0166999975840251, 0, 0.0831166664759318, 0.0333333333333333, 
0), cycle = c(&quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, 
&quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_1&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, 
&quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, 
&quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;, &quot;ciclo_2&quot;)), row.names = c(NA, 
-21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;))

My code at the moment is:

df %&gt;%
arrange(datetime) %&gt;% 
mutate(diff_min = as.numeric(lead(datetime) - datetime) / 60, cycle = ifelse(diff_min &lt; 5, &quot;ciclo_1&quot;, paste0(&quot;ciclo_&quot;, cumsum(diff_min &gt;= 5) + 1)))

Thanks any help

答案1

得分: 3

我们可以在这里使用cumsum

library(dplyr)
quux %>%
  arrange(datetime) %>%
  mutate(
    diff_min = c(as.numeric(diff(datetime), units = "mins"), 0),
    cycle = 1 + cumsum(diff_min > 5)
  )
#               datetime     diff_min cycle
# 1  2022-09-03 06:10:34   0.01658334     1
# 2  2022-09-03 06:10:35   0.01665000     1
# 3  2022-09-03 06:10:36   0.01660000     1
# 4  2022-09-03 06:10:37   0.03328333     1
# 5  2022-09-03 06:10:39   0.00000000     1
# 6  2022-09-03 06:10:39   0.05003333     1
# 7  2022-09-03 06:10:42   0.03336667     1
# 8  2022-09-03 06:10:44   0.01676666     1
# 9  2022-09-03 06:10:45 360.79526667     2
# 10 2022-09-03 12:11:32   0.02008333     2
# 11 2022-09-03 12:11:33   0.05041667     2
# 12 2022-09-03 12:11:37   0.03336667     2
# 13 2022-09-03 12:11:39   0.01656667     2
# 14 2022-09-03 12:11:40   0.01665000     2
# 15 2022-09-03 12:11:41   0.00000000     2
# 16 2022-09-03 12:11:41   0.00000000     2
# 17 2022-09-03 12:11:41   0.01670000     2
# 18 2022-09-03 12:11:42   0.00000000     2
# 19 2022-09-03 12:11:42   0.08311667     2
# 20 2022-09-03 12:11:47   0.03333333     2
# 21 2022-09-03 12:11:49   0.00000000     2

如果你愿意的话,你可以使用paste0("ciclo_", cycle)

由于你的数据最初是"data.table"类,这里是另一种方法:

library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = "mins") >= 5))]

如果你不需要中间的diff_min变量,可以使用这种方法。

英文:

We can use cumsum here:

library(dplyr)
quux %&gt;%
  arrange(datetime) %&gt;%
  mutate(
    diff_min = c(as.numeric(diff(datetime), units = &quot;mins&quot;), 0),
    cycle = 1+cumsum(diff_min &gt; 5)
  )
#               datetime     diff_min cycle
# 1  2022-09-03 06:10:34   0.01658334     1
# 2  2022-09-03 06:10:35   0.01665000     1
# 3  2022-09-03 06:10:36   0.01660000     1
# 4  2022-09-03 06:10:37   0.03328333     1
# 5  2022-09-03 06:10:39   0.00000000     1
# 6  2022-09-03 06:10:39   0.05003333     1
# 7  2022-09-03 06:10:42   0.03336667     1
# 8  2022-09-03 06:10:44   0.01676666     1
# 9  2022-09-03 06:10:45 360.79526667     2
# 10 2022-09-03 12:11:32   0.02008333     2
# 11 2022-09-03 12:11:33   0.05041667     2
# 12 2022-09-03 12:11:37   0.03336667     2
# 13 2022-09-03 12:11:39   0.01656667     2
# 14 2022-09-03 12:11:40   0.01665000     2
# 15 2022-09-03 12:11:41   0.00000000     2
# 16 2022-09-03 12:11:41   0.00000000     2
# 17 2022-09-03 12:11:41   0.01670000     2
# 18 2022-09-03 12:11:42   0.00000000     2
# 19 2022-09-03 12:11:42   0.08311667     2
# 20 2022-09-03 12:11:47   0.03333333     2
# 21 2022-09-03 12:11:49   0.00000000     2

to which you can paste0(&quot;ciclo_&quot;, cycle) if you'd like.

Since your data is originally class &quot;data.table&quot;, here's an alternative:

library(data.table)
quux[, cycle := cumsum(c(TRUE, as.numeric(diff(datetime), units = &quot;mins&quot;) &gt;= 5))]

without the intermediate diff_min, in case you don't need that variable for anything else.


Data

quux &lt;- data.table::as.data.table(structure(list(datetime = structure(c(1662185434.08, 1662185435.075, 1662185436.074, 1662185437.07, 1662185439.067, 1662185439.067, 1662185442.069, 1662185444.071, 1662185445.077, 1662207092.793, 1662207093.998, 1662207097.023, 1662207099.025, 1662207100.019, 1662207101.018, 1662207101.018, 1662207101.018, 1662207102.02, 1662207102.02, 1662207107.007, 1662207109.007), tzone = &quot;UTC&quot;, class = c(&quot;POSIXct&quot;, &quot;POSIXt&quot;))), row.names = c(NA, -21L), class = c(&quot;data.table&quot;, &quot;data.frame&quot;)))

huangapple
  • 本文由 发表于 2023年7月14日 03:14:53
  • 转载请务必保留本文链接:https://go.coder-hub.com/76682575.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定