2023年7月3日 19:09:48go评论102阅读模式

英文:

How to carry lag values and sum with another attribute using R?

问题

Part B

df <- df %>%
  mutate(ItemA = ifelse(Schedule == 0 & Day != "Sunday", lag(ItemA), ItemA)) %>%
  group_by(AreaID) %>%
  fill(ItemA, .direction = "down") %>%
  ungroup() %>%
  mutate(ItemB = ifelse(Schedule == 1, ItemA + ItemB, ItemB))

Required output

Output <- data.frame(AreaID = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2'),
                     Period = c('24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023', '24/07/2023', '25/07/2023', '26/07/2023', '27/07/2023', '28/07/2023', '29/07/2023', '30/07/2023', '31/07/2023', '1/08/2023', '2/08/2023', '3/08/2023', '4/08/2023', '5/08/2023', '6/08/2023'),
                     Day = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'),
                     ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
                     ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157, 175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
                     Schedule = c('1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0', '1', '1', '1', '0', '1', '0', '0'))

英文:

Required library

library(tidyverse)
library(lubridate)

Create data

df &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),
ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
ItemB = c(150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170,150, 110, 140, 130, 140, 100, 110, 190, 180, 120, 100, 110, 120, 170 ),
Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

In this above data (i.e., df), for variable "Schedule" 1 means ItemA quantity was delivered and 0 means ItemA quantity was not delivered. If Schedule is 0 for any day, then for part A, I need to deduct ItemA from ItemB; for part B, I need to carry ItemA value for the immediate following day Schedule where it is 1 and add with ItemB. Note there is no delivery on Sunday (i.e., for all Sunday, Schedule is 0).

I tried as below and solved part A but couldn't reach solution for part B.

Part A

df %&gt;%
  mutate(ItemB = case_when(Day == &quot;Monday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Tuesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Wednesday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Thursday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Friday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Saturday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           Day == &quot;Sunday&quot; &amp; Schedule == 0 ~ ItemB - ItemA,
                           TRUE ~ ItemB
                           ))

Require output

Output &lt;- data.frame(AreaID = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;,&#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;, &#39;2&#39;),
Period = c(&#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;, &#39;24/07/2023&#39;, &#39;25/07/2023&#39;, &#39;26/07/2023&#39;,&#39;27/07/2023&#39;, &#39;28/07/2023&#39;, &#39;29/07/2023&#39;, &#39;30/07/2023&#39;, &#39;31/07/2023&#39;, &#39;1/08/2023&#39;, &#39;2/08/2023&#39;, &#39;3/08/2023&#39;, &#39;4/08/2023&#39;, &#39;5/08/2023&#39;, &#39;6/08/2023&#39;),
Day = c(&#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;, &#39;Monday&#39;, &#39;Tuesday&#39;, &#39;Wednesday&#39;, &#39;Thursday&#39;, &#39;Friday&#39;, &#39;Saturday&#39;, &#39;Sunday&#39;),
ItemA = c(10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13, 10, 11, 12, 13, 14, 10, 11, 9, 8, 12, 10, 11, 12, 13),
ItemB = c(150, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157,175, 110, 140, 117, 153, 90, 99, 211, 180, 120, 90, 120, 108, 157),
Schedule = c(&#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;, &#39;1&#39;, &#39;1&#39;, &#39;1&#39;, &#39;0&#39;, &#39;1&#39;, &#39;0&#39;, &#39;0&#39;))

Can anyone help me to figure out how can I solve this part B? Thanks in advance.

答案1

得分: 0

我认为这应该可以工作，但如果没有预期的输出数据框，很难确定。

# 清理和整理数据框
df <- df %>%
    as_tibble() %>%
    mutate(Period = as.Date(Period, format = "%d/%m/%Y"),
    Day = factor(Day, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")),
    ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))
# 这不是向量化的，但假设您不会处理 100,000+ 行，那就不应该成为问题
processor <- function(deliv_df) {
    rollover_value = 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB - deliv_df$ItemA
        } else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB + rollover_value
            rollover_value <- 0
        }
    }
    return(deliv_df)
}
processor(df)
   AreaID Period     Day       ItemA ItemB Schedule
   <chr>  <date>     <fct>     <dbl> <dbl>    <dbl>
 1 1      2023-07-24 Monday       10   150        1
 2 1      2023-07-25 Tuesday      11   110        1
 3 1      2023-07-26 Wednesday    12   140        1
 4 1      2023-07-27 Thursday     13   140        0
 5 1      2023-07-28 Friday       14   140        1
 6 1      2023-07-29 Saturday     10   140        0
 7 1      2023-07-30 Sunday       11   140        0
 8 1      2023-07-31 Monday        9   190        1
 9 1      2023-08-01 Tuesday       8   180        1
10 1      2023-08-02 Wednesday    12   120        1

英文:

I think this should work, however, it's hard to know for sure without an expected output dataframe.

# clean and tidy the dataframe
df &lt;- df %&gt;% 
    as_tibble() %&gt;% 
    mutate(Period = as.Date(Period, format = &quot;%d/%m/%Y&quot;),
    Day = factor(Day, levels = c(&quot;Monday&quot;, &quot;Tuesday&quot;, &quot;Wednesday&quot;, &quot;Thursday&quot;, &quot;Friday&quot;, &quot;Saturday&quot;, &quot;Sunday&quot;)), 
    ItemA = as.numeric(ItemA), ItemB = as.numeric(ItemB), Schedule = as.numeric(Schedule))
# this isn&#39;t vectorised, but assuming you&#39;re not processing 100,000+ rows, it shouldn&#39;t be a problem
processor &lt;- function(deliv_df) {
    rollover_value = 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB - deliv_df$ItemA
        } else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB + rollover_value
            rollover_value &lt;- 0
        }
    }
    return(deliv_df)
}
processor(df)
   AreaID Period     Day       ItemA ItemB Schedule
   &lt;chr&gt;  &lt;date&gt;     &lt;fct&gt;     &lt;dbl&gt; &lt;dbl&gt;    &lt;dbl&gt;
 1 1      2023-07-24 Monday       10   150        1
 2 1      2023-07-25 Tuesday      11   110        1
 3 1      2023-07-26 Wednesday    12   140        1
 4 1      2023-07-27 Thursday     13   140        0
 5 1      2023-07-28 Friday       14   140        1
 6 1      2023-07-29 Saturday     10   140        0
 7 1      2023-07-30 Sunday       11   140        0
 8 1      2023-07-31 Monday        9   190        1
 9 1      2023-08-01 Tuesday       8   180        1
10 1      2023-08-02 Wednesday    12   120        1
</details>
# 答案2
**得分**: 0
以下是对我的问题的正确解决方案。我根据问题要求修改了Mark在这篇帖子中提出的代码，以实现正确的解决方案。我非常感谢Mark的贡献。
```R
# 修改后的Mark的代码
processor <- function(deliv_df) {
    rollover_value <- 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i] - deliv_df$ItemA[i]
             rollover_value =  rollover_value + deliv_df$ItemA[i]
        }
        else if (deliv_df$Schedule[i] == 1 & rollover_value > 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i] + rollover_value
            rollover_value <- 0
        }
        else if (deliv_df$Schedule[i] == 1 & rollover_value == 0) {
            deliv_df$ItemB[i] <- deliv_df$ItemB[i]
            rollover_value <- 0
        }
    }
    return(deliv_df)
}
processor(df)

英文:

Below is the correct solution for my question. I have modified the codes proposed by Mark in this post based on the question requirement to achieve the correct solution. I highly appreciate Mark's contribution as well.

# Modified Mark&#39;s codes
processor &lt;- function(deliv_df) {
    rollover_value &lt;- 0
    for (i in seq_along(deliv_df$ItemA)) {
        if (deliv_df$Schedule[i] == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] - deliv_df$ItemA[i]
             rollover_value =  rollover_value + deliv_df$ItemA[i]
        }
        else if (deliv_df$Schedule[i] == 1 &amp; rollover_value &gt; 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i] + rollover_value
            rollover_value &lt;- 0
        }
        else if (deliv_df$Schedule[i] == 1 &amp; rollover_value == 0) {
            deliv_df$ItemB[i] &lt;- deliv_df$ItemB[i]
            rollover_value &lt;- 0
        }
    }
    return(deliv_df)
}
processor(df)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

如何使用R传递滞后值并与另一个属性求和？

问题

Part B

Required output

Required library

Create data

Part A

Require output

答案1

HTML/XML: 理解”滚动条”的工作方式

Quarto在条件语句内部未正确呈现图表。

如何将正在运行的进程的数据导向到用于处理的Python脚本？

修改热图的距离和链接方式

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。