根据R中的单独逻辑变量计算连续周数。

huangapple go评论70阅读模式
英文:

Count consecutive weeks based on separate Logical Variable in R

问题

在我的数据框 df 中,我需要构建列 count。它应该递增地计算在同一个 customer_id 的两个 Buy=TRUE 之间连续的 Buy=FALSE。然而,如果同一个 customer_id 没有先前的 Buy=TRUE,那么应该为 NA。一个 data.table 或 dplyr 的解决方案会很好。

df <- data.frame(
  customer_id = rep(1:2, each = 6),
  Buy = c(FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE),
  count = c(NA, NA, 0, 1, 2, 0, NA, 0, 1, 0, 0, 1)
)
英文:

In my data.frame df, I need to construct the column count. It should incrementally count the consecutive Buy=FALSE between two Buy=TRUE for the same customer_id. However, it should be NA if there is no previous Buy=TRUE for the same customer_id. A data.table or dplyr solution would be great.

df &lt;- data.frame(
  customer_id = rep(1:2, each = 6),
  Buy = c(FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE),
  count = c(NA, NA, 0, 1, 2, 0, NA, 0, 1, 0, 0, 1)
)

答案1

得分: 1

library(dplyr)
df |&gt;
  mutate(
    consec_id = consecutive_id(Buy),
    any_true = cumsum(Buy) &gt; 0,
    .by = customer_id
  ) |&gt;
  mutate(
    result = case_when(
      Buy ~ 0L,
      any_true ~ row_number(),
      TRUE ~ NA_integer_
    ),
    .by = c(customer_id, consec_id)
  )
#    customer_id   Buy count consec_id any_true result
# 1            1 FALSE    NA         1    FALSE     NA
# 2            1 FALSE    NA         1    FALSE     NA
# 3            1  TRUE     0         2     TRUE      0
# 4            1 FALSE     1         3     TRUE      1
# 5            1 FALSE     2         3     TRUE      2
# 6            1  TRUE     0         4     TRUE      0
# 7            2 FALSE    NA         1    FALSE     NA
# 8            2  TRUE     0         2     TRUE      0
# 9            2 FALSE     1         3     TRUE      1
# 10           2  TRUE     0         4     TRUE      0
# 11           2  TRUE     0         4     TRUE      0
# 12           2 FALSE     1         5     TRUE      1
英文:
library(dplyr)
df |&gt;
  mutate(
    consec_id = consecutive_id(Buy),
    any_true = cumsum(Buy) &gt; 0,
    .by = customer_id
  ) |&gt;
  mutate(
    result = case_when(
      Buy ~ 0L,
      any_true ~ row_number(),
      TRUE ~ NA_integer_
    ),
    .by = c(customer_id, consec_id)
  )
#    customer_id   Buy count consec_id any_true result
# 1            1 FALSE    NA         1    FALSE     NA
# 2            1 FALSE    NA         1    FALSE     NA
# 3            1  TRUE     0         2     TRUE      0
# 4            1 FALSE     1         3     TRUE      1
# 5            1 FALSE     2         3     TRUE      2
# 6            1  TRUE     0         4     TRUE      0
# 7            2 FALSE    NA         1    FALSE     NA
# 8            2  TRUE     0         2     TRUE      0
# 9            2 FALSE     1         3     TRUE      1
# 10           2  TRUE     0         4     TRUE      0
# 11           2  TRUE     0         4     TRUE      0
# 12           2 FALSE     1         5     TRUE      1

答案2

得分: 1

一种方法可能是

df$count <- NULL
library(dplyr)

df %>%
  group_by(grp = cumsum(Buy), customer_id) %>%
  mutate(count = row_number() - 1, grp = Buy[1] == F) %>%
  ungroup() %>%
  mutate(count = ifelse(grp, NA, count), grp = NULL)
# A tibble: 12 × 3
   customer_id Buy   count
         <int> <lgl> <dbl>
 1           1 FALSE    NA
 2           1 FALSE    NA
 3           1 TRUE      0
 4           1 FALSE     1
 5           1 FALSE     2
 6           1 TRUE      0
 7           2 FALSE    NA
 8           2 TRUE      0
 9           2 FALSE     1
10           2 TRUE      0
11           2 TRUE      0
12           2 FALSE     1
英文:

One way could be

df$count &lt;- NULL
library(dplyr)

df %&gt;% 
  group_by(grp = cumsum(Buy), customer_id) %&gt;% 
  mutate(count = row_number() - 1, grp = Buy[1] == F) %&gt;% 
  ungroup() %&gt;% 
  mutate(count = ifelse(grp, NA, count), grp = NULL)
# A tibble: 12 &#215; 3
   customer_id Buy   count
         &lt;int&gt; &lt;lgl&gt; &lt;dbl&gt;
 1           1 FALSE    NA
 2           1 FALSE    NA
 3           1 TRUE      0
 4           1 FALSE     1
 5           1 FALSE     2
 6           1 TRUE      0
 7           2 FALSE    NA
 8           2 TRUE      0
 9           2 FALSE     1
10           2 TRUE      0
11           2 TRUE      0
12           2 FALSE     1

huangapple
  • 本文由 发表于 2023年6月14日 23:30:55
  • 转载请务必保留本文链接:https://go.coder-hub.com/76475234.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定