如何使用R传递滞后值并与另一个属性求和?

huangapple go评论102阅读模式
英文:

How to carry lag values and sum with another attribute using R?

问题

Part B

  1. df <- df %>%
  2. mutate(ItemA = ifelse(Schedule == 0 & Day != "Sunday", lag(ItemA), ItemA)) %>%
  3. group_by(AreaID) %>%
  4. fill(ItemA, .direction = "down") %>%
  5. ungroup() %>%
  6. mutate(ItemB = ifelse(Schedule == 1, ItemA + ItemB, ItemB))

Required output

  1. Output <- data.frame(AreaID = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2'),
  2. Period = c('24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023', '24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023'),
  3. Day = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'),
  4. ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
  5. ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157, 175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
  6. Schedule = c('1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0'))
英文:

Required library

  1. library(tidyverse)
  2. library(lubridate)

Create data

  1. df &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
  2. Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
  3. Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),
  4. ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
  5. ItemB = c(150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170,150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170 ),
  6. Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

In this above data (i.e., df), for variable "Schedule" 1 means ItemA quantity was delivered and 0 means ItemA quantity was not delivered. If Schedule is 0 for any day, then for part A, I need to deduct ItemA from ItemB; for part B, I need to carry ItemA value for the immediate following day Schedule where it is 1 and add with ItemB. Note there is no delivery on Sunday (i.e., for all Sunday, Schedule is 0).

I tried as below and solved part A but couldn't reach solution for part B.

Part A

  1. df %&gt;%
  2. mutate(ItemB = case_when(Day == &quot;Monday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  3. Day == &quot;Tuesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  4. Day == &quot;Wednesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  5. Day == &quot;Thursday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  6. Day == &quot;Friday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  7. Day == &quot;Saturday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  8. Day == &quot;Sunday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
  9. TRUE ~ ItemB
  10. ))

Require output

  1. Output &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
  2. Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
  3. Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),
  4. ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
  5. ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157,175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
  6. Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

Can anyone help me to figure out how can I solve this part B? Thanks in advance.

答案1

得分: 0

我认为这应该可以工作,但如果没有预期的输出数据框,很难确定。

  1. # 清理和整理数据框
  2. df <- df %>%
  3. as_tibble() %>%
  4. mutate(Period = as.Date(Period, format = "%d/%m/%Y"),
  5. Day = factor(Day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")),
  6. ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))
  7. # 这不是向量化的,但假设您不会处理 100,000+ 行,那就不应该成为问题
  8. processor <- function(deliv_df) {
  9. rollover_value = 0
  10. for (i in seq_along(deliv_df$ItemA)) {
  11. if (deliv_df$Schedule[i] == 0) {
  12. deliv_df$ItemB[i] <- deliv_df$ItemB - deliv_df$ItemA
  13. } else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
  14. deliv_df$ItemB[i] <- deliv_df$ItemB + rollover_value
  15. rollover_value <- 0
  16. }
  17. }
  18. return(deliv_df)
  19. }
  20. processor(df)
  21. AreaID Period Day ItemA ItemB Schedule
  22. <chr> <date> <fct> <dbl> <dbl> <dbl>
  23. 1 1 2023-07-24 Monday 10 150 1
  24. 2 1 2023-07-25 Tuesday 11 110 1
  25. 3 1 2023-07-26 Wednesday 12 140 1
  26. 4 1 2023-07-27 Thursday 13 140 0
  27. 5 1 2023-07-28 Friday 14 140 1
  28. 6 1 2023-07-29 Saturday 10 140 0
  29. 7 1 2023-07-30 Sunday 11 140 0
  30. 8 1 2023-07-31 Monday 9 190 1
  31. 9 1 2023-08-01 Tuesday 8 180 1
  32. 10 1 2023-08-02 Wednesday 12 120 1
英文:

I think this should work, however, it's hard to know for sure without an expected output dataframe.

  1. # clean and tidy the dataframe
  2. df &lt;- df %&gt;%
  3. as_tibble() %&gt;%
  4. mutate(Period = as.Date(Period, format = &quot;%d/%m/%Y&quot;),
  5. Day = factor(Day, levels = c(&quot;Monday&quot;, &quot;Tuesday&quot;, &quot;Wednesday&quot;, &quot;Thursday&quot;, &quot;Friday&quot;, &quot;Saturday&quot;, &quot;Sunday&quot;)),
  6. ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))
  7. # this isn&#39;t vectorised, but assuming you&#39;re not processing 100,000+ rows, it shouldn&#39;t be a problem
  8. processor &lt;- function(deliv_df) {
  9. rollover_value = 0
  10. for (i in seq_along(deliv_df$ItemA)) {
  11. if (deliv_df$Schedule[i] == 0) {
  12. deliv_df$ItemB[i] &lt;- deliv_df$ItemB - deliv_df$ItemA
  13. } else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
  14. deliv_df$ItemB[i] &lt;- deliv_df$ItemB + rollover_value
  15. rollover_value &lt;- 0
  16. }
  17. }
  18. return(deliv_df)
  19. }
  20. processor(df)
  21. AreaID Period Day ItemA ItemB Schedule
  22. &lt;chr&gt; &lt;date&gt; &lt;fct&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
  23. 1 1 2023-07-24 Monday 10 150 1
  24. 2 1 2023-07-25 Tuesday 11 110 1
  25. 3 1 2023-07-26 Wednesday 12 140 1
  26. 4 1 2023-07-27 Thursday 13 140 0
  27. 5 1 2023-07-28 Friday 14 140 1
  28. 6 1 2023-07-29 Saturday 10 140 0
  29. 7 1 2023-07-30 Sunday 11 140 0
  30. 8 1 2023-07-31 Monday 9 190 1
  31. 9 1 2023-08-01 Tuesday 8 180 1
  32. 10 1 2023-08-02 Wednesday 12 120 1
  33. </details>
  34. # 答案2
  35. **得分**: 0
  36. 以下是对我的问题的正确解决方案。我根据问题要求修改了Mark在这篇帖子中提出的代码,以实现正确的解决方案。我非常感谢Mark的贡献。
  37. ```R
  38. # 修改后的Mark的代码
  39. processor <- function(deliv_df) {
  40. rollover_value <- 0
  41. for (i in seq_along(deliv_df$ItemA)) {
  42. if (deliv_df$Schedule[i] == 0) {
  43. deliv_df$ItemB[i] <- deliv_df$ItemB[i] - deliv_df$ItemA[i]
  44. rollover_value = rollover_value + deliv_df$ItemA[i]
  45. }
  46. else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
  47. deliv_df$ItemB[i] <- deliv_df$ItemB[i] + rollover_value
  48. rollover_value <- 0
  49. }
  50. else if (deliv_df$Schedule[i] == 1 & rollover_value == 0) {
  51. deliv_df$ItemB[i] <- deliv_df$ItemB[i]
  52. rollover_value <- 0
  53. }
  54. }
  55. return(deliv_df)
  56. }
  57. processor(df)
英文:

Below is the correct solution for my question. I have modified the codes proposed by Mark in this post based on the question requirement to achieve the correct solution. I highly appreciate Mark's contribution as well.

  1. # Modified Mark&#39;s codes
  2. processor &lt;- function(deliv_df) {
  3. rollover_value &lt;- 0
  4. for (i in seq_along(deliv_df$ItemA)) {
  5. if (deliv_df$Schedule[i] == 0) {
  6. deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] - deliv_df$ItemA[i]
  7. rollover_value = rollover_value + deliv_df$ItemA[i]
  8. }
  9. else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
  10. deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] + rollover_value
  11. rollover_value &lt;- 0
  12. }
  13. else if (deliv_df$Schedule[i] == 1 &amp; rollover_value == 0) {
  14. deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i]
  15. rollover_value &lt;- 0
  16. }
  17. }
  18. return(deliv_df)
  19. }
  20. processor(df)

huangapple
  • 本文由 发表于 2023年7月3日 19:09:48
  • 转载请务必保留本文链接:https://go.coder-hub.com/76604176.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定