如何使用R传递滞后值并与另一个属性求和?

huangapple go评论68阅读模式
英文:

How to carry lag values and sum with another attribute using R?

问题

Part B

df <- df %>%
  mutate(ItemA = ifelse(Schedule == 0 & Day != "Sunday", lag(ItemA), ItemA)) %>%
  group_by(AreaID) %>%
  fill(ItemA, .direction = "down") %>%
  ungroup() %>%
  mutate(ItemB = ifelse(Schedule == 1, ItemA + ItemB, ItemB))

Required output

Output <- data.frame(AreaID = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2'),
                     Period = c('24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023', '24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023'),
                     Day = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'),
                     ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
                     ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157, 175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
                     Schedule = c('1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0'))
英文:

Required library

library(tidyverse)
library(lubridate)

Create data

df &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),

ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),

ItemB = c(150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170,150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170 ),
Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

In this above data (i.e., df), for variable "Schedule" 1 means ItemA quantity was delivered and 0 means ItemA quantity was not delivered. If Schedule is 0 for any day, then for part A, I need to deduct ItemA from ItemB; for part B, I need to carry ItemA value for the immediate following day Schedule where it is 1 and add with ItemB. Note there is no delivery on Sunday (i.e., for all Sunday, Schedule is 0).

I tried as below and solved part A but couldn't reach solution for part B.

Part A

df %&gt;%
  mutate(ItemB = case_when(Day == &quot;Monday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Tuesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Wednesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Thursday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Friday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Saturday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Sunday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           TRUE ~ ItemB
                           ))

Require output

Output &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),

ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),

ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157,175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

Can anyone help me to figure out how can I solve this part B? Thanks in advance.

答案1

得分: 0

我认为这应该可以工作,但如果没有预期的输出数据框,很难确定。

# 清理和整理数据框
df <- df %>%
    as_tibble() %>%
    mutate(Period = as.Date(Period, format = "%d/%m/%Y"),
    Day = factor(Day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")),
    ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))

# 这不是向量化的,但假设您不会处理 100,000+ 行,那就不应该成为问题
processor <- function(deliv_df) {
    rollover_value = 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB - deliv_df$ItemA
        } else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB + rollover_value
            rollover_value <- 0
        }
    }
    return(deliv_df)
}

processor(df)

   AreaID Period     Day       ItemA ItemB Schedule
   <chr>  <date>     <fct>     <dbl> <dbl>    <dbl>
 1 1      2023-07-24 Monday       10   150        1
 2 1      2023-07-25 Tuesday      11   110        1
 3 1      2023-07-26 Wednesday    12   140        1
 4 1      2023-07-27 Thursday     13   140        0
 5 1      2023-07-28 Friday       14   140        1
 6 1      2023-07-29 Saturday     10   140        0
 7 1      2023-07-30 Sunday       11   140        0
 8 1      2023-07-31 Monday        9   190        1
 9 1      2023-08-01 Tuesday       8   180        1
10 1      2023-08-02 Wednesday    12   120        1
英文:

I think this should work, however, it's hard to know for sure without an expected output dataframe.

# clean and tidy the dataframe
df &lt;- df %&gt;% 
    as_tibble() %&gt;% 
    mutate(Period = as.Date(Period, format = &quot;%d/%m/%Y&quot;),
    Day = factor(Day, levels = c(&quot;Monday&quot;, &quot;Tuesday&quot;, &quot;Wednesday&quot;, &quot;Thursday&quot;, &quot;Friday&quot;, &quot;Saturday&quot;, &quot;Sunday&quot;)), 
    ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))

# this isn&#39;t vectorised, but assuming you&#39;re not processing 100,000+ rows, it shouldn&#39;t be a problem
processor &lt;- function(deliv_df) {
    rollover_value = 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB - deliv_df$ItemA
        } else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB + rollover_value
            rollover_value &lt;- 0
        }
    }
    return(deliv_df)
}

processor(df)

   AreaID Period     Day       ItemA ItemB Schedule
   &lt;chr&gt;  &lt;date&gt;     &lt;fct&gt;     &lt;dbl&gt; &lt;dbl&gt;    &lt;dbl&gt;
 1 1      2023-07-24 Monday       10   150        1
 2 1      2023-07-25 Tuesday      11   110        1
 3 1      2023-07-26 Wednesday    12   140        1
 4 1      2023-07-27 Thursday     13   140        0
 5 1      2023-07-28 Friday       14   140        1
 6 1      2023-07-29 Saturday     10   140        0
 7 1      2023-07-30 Sunday       11   140        0
 8 1      2023-07-31 Monday        9   190        1
 9 1      2023-08-01 Tuesday       8   180        1
10 1      2023-08-02 Wednesday    12   120        1



</details>



# 答案2
**得分**: 0

以下是对我的问题的正确解决方案。我根据问题要求修改了Mark在这篇帖子中提出的代码,以实现正确的解决方案。我非常感谢Mark的贡献。

```R
# 修改后的Mark的代码

processor <- function(deliv_df) {
    rollover_value <- 0

    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i] - deliv_df$ItemA[i]
             rollover_value =  rollover_value + deliv_df$ItemA[i]
        }
        else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i] + rollover_value
            rollover_value <- 0
        }
        else if (deliv_df$Schedule[i] == 1 & rollover_value == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i]
            rollover_value <- 0
        }
    }

    return(deliv_df)
}

processor(df)

英文:

Below is the correct solution for my question. I have modified the codes proposed by Mark in this post based on the question requirement to achieve the correct solution. I highly appreciate Mark's contribution as well.

# Modified Mark&#39;s codes

processor &lt;- function(deliv_df) {
    rollover_value &lt;- 0

    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] - deliv_df$ItemA[i]
             rollover_value =  rollover_value + deliv_df$ItemA[i]
        }
        else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] + rollover_value
            rollover_value &lt;- 0
        }
        else if (deliv_df$Schedule[i] == 1 &amp; rollover_value == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i]
            rollover_value &lt;- 0
        }
    }

    return(deliv_df)
}

processor(df)

huangapple
  • 本文由 发表于 2023年7月3日 19:09:48
  • 转载请务必保留本文链接:https://go.coder-hub.com/76604176.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定