在R中在递归函数中使用Map()

huangapple go评论73阅读模式
英文:

Using Map() in a Recursive Function in R

问题

以下是您要翻译的代码部分:

这是我的带有数据结构方法的BOM:

dput(df2)
structure(list(product_id = c("P1", "P1", "P1", "P1", "P1", "P1", "P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2"), item_id = c("i1", "i2", "i3", "i4", "i5", "i6", "i7", "i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16", "i17", "i18"), sup_item_id = c("i6", "i6", "i6", "i6", "i8", "i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17", "i18", "i18", NA), quantity = c(2, 2, 5, 1, 1, 2, 4, 1, 1, 2, 2, 5, 1, 1, 2, 4, 1, 1), price = c(2, 5, 3, 7, 10, 0, 4, 0, 0, 2, 5, 3, 7, 20, 0, 2, 0, 0), itemtype = c("A", "A", "A", "A", "A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B", "C")), class = "data.frame", row.names = c(NA, -18L))

要定义项目级别(从上到下),这是具有内部循环的递归函数:

change_df <- function(df, changed = TRUE, idx = which(df$production_level == 1)) {

  for (i in idx) {
    descendants <- which(df$sup_item_id == df$item_id[i])

    if (length(descendants) > 0) {
      new_levels <- df$production_level[i] + 1

      if (any(df$production_level[descendants] < new_levels)) {
        df$production_level[descendants] <- new_levels
        df <- change_df(df, idx = idx)
      }

    }
  }
  return(df)
}

然后我这样调用它:

df2 %>%
  arrange(product_id, item_id) %>%
  mutate(production_level = ifelse(is.na(sup_item_id), 0, 1)) %>%
  change_df(df2)

这是我的代码的输出:

product_id item_id sup_item_id quantity price itemtype production_level
1          P1      i1          i6        2     2        A          3
2          P1      i2          i6        2     5        A          3
3          P1      i3          i6        5     3        A          3
4          P1      i4          i6        1     7        A          3
5          P1      i5          i8        1    10        A          2
6          P1      i6          i8        2     0        B          2
7          P1      i7          i9        4     4        A          1
8          P1      i8          i9        1     0        B          1
9          P1      i9        <NA>        1     0        C          0
10         P2     i10         i15        2     2        A          3
11         P2     i11         i15        2     5        A          3
12         P2     i12         i15        5     3        A          3
13         P2     i13         i15        1     7        A          3
14         P2     i14         i17        1    20        A          2
15         P2     i15         i17        2     0        B          2
16         P2     i16         i18        4     2        A          1
17         P2     i17         i18        1     0        B          1
18         P2     i18        <NA>        1     0        C          0

我不想更改输出。现在,我想使用R中的map()函数代替循环,以使我的代码更紧凑。我还想知道idx方法是否有效,或者是否有更好的方法。如果您有任何建议,请分享!
英文:

Here is my BOM with a data structure approach:

    dput(df2)
structure(list(product_id = c(&quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, 
&quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, 
&quot;P2&quot;), item_id = c(&quot;i1&quot;, &quot;i2&quot;, &quot;i3&quot;, &quot;i4&quot;, &quot;i5&quot;, &quot;i6&quot;, &quot;i7&quot;, 
&quot;i8&quot;, &quot;i9&quot;, &quot;i10&quot;, &quot;i11&quot;, &quot;i12&quot;, &quot;i13&quot;, &quot;i14&quot;, &quot;i15&quot;, &quot;i16&quot;, 
&quot;i17&quot;, &quot;i18&quot;), sup_item_id = c(&quot;i6&quot;, &quot;i6&quot;, &quot;i6&quot;, &quot;i6&quot;, &quot;i8&quot;, 
&quot;i8&quot;, &quot;i9&quot;, &quot;i9&quot;, NA, &quot;i15&quot;, &quot;i15&quot;, &quot;i15&quot;, &quot;i15&quot;, &quot;i17&quot;, &quot;i17&quot;, 
&quot;i18&quot;, &quot;i18&quot;, NA), quantity = c(2, 2, 5, 1, 1, 2, 4, 1, 1, 2, 
2, 5, 1, 1, 2, 4, 1, 1), price = c(2, 5, 3, 7, 10, 0, 4, 0, 0, 
2, 5, 3, 7, 20, 0, 2, 0, 0), itemtype = c(&quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, 
&quot;A&quot;, &quot;B&quot;, &quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;B&quot;, &quot;A&quot;, &quot;B&quot;, 
&quot;C&quot;)), class = &quot;data.frame&quot;, row.names = c(NA, -18L))

To define the item levels (from top to bottom) Here is the recursive function with a loop inside:

change_df &lt;- function(df, changed = TRUE, idx = which(df$production_level == 1)) {
for (i in idx) {
descendants &lt;- which(df$sup_item_id == df$item_id[i])
if (length(descendants) &gt; 0) {
new_levels &lt;- df$production_level[i] + 1
if (any(df$production_level[descendants] &lt; new_levels)) {
df$production_level[descendants] &lt;- new_levels
df &lt;- change_df(df, idx = idx)
}
}
}
return(df)
}

I then call it this way:

 df2 %&gt;%
arrange(product_id, item_id) %&gt;%
mutate(production_level = ifelse(is.na(sup_item_id), 0, 1)) %&gt;% 
change_df(df2)

This is the output of my code:

product_id item_id sup_item_id quantity price itemtype production_level
1          P1      i1          i6        2     2        A          3
2          P1      i2          i6        2     5        A          3
3          P1      i3          i6        5     3        A          3
4          P1      i4          i6        1     7        A          3
5          P1      i5          i8        1    10        A          2
6          P1      i6          i8        2     0        B          2
7          P1      i7          i9        4     4        A          1
8          P1      i8          i9        1     0        B          1
9          P1      i9        &lt;NA&gt;        1     0        C          0
10         P2     i10         i15        2     2        A          3
11         P2     i11         i15        2     5        A          3
12         P2     i12         i15        5     3        A          3
13         P2     i13         i15        1     7        A          3
14         P2     i14         i17        1    20        A          2
15         P2     i15         i17        2     0        B          2
16         P2     i16         i18        4     2        A          1
17         P2     i17         i18        1     0        B          1
18         P2     i18        &lt;NA&gt;        1     0        C          0

I dont want to change the output. Now I want to use map() function in R instead of the loop to make my code more compact. I also wonder if the idx approach is good or would something else work better. If you have any suggestions, please share!

答案1

得分: 2

如果您考虑使用tidygraph / igraph,请尝试看看是否在您的实际数据上运行并扩展。

item_idsup_item_id 用作创建tidygraph对象的边缘列。有一个关于"NA"字符串的警告,因为tidygraph添加了一个名为"NA"的节点,过滤器处理这个问题。

igraph 提供离心率度量

顶点的离心率是其到图中最远的其他节点的最短路径距离。

这应该正是您所需要的,尽管请测试具有共享item_id等特殊情况。

library(dplyr)
library(tidygraph)

# to_from_df,一个用于igraph边缘的2列数据框
# 返回节点离心率度量向量
node_level <- function(from_to_df){
  as_tbl_graph(from_to_df) %>% 
    activate(nodes)  %>% 
    filter(name != "NA") %>% 
    mutate(eccentricity = node_eccentricity()) %>% 
    pull(eccentricity)
}

df %>% mutate(production_level = node_level(pick(item_id, sup_item_id)))

#> 警告: 在 `mutate()` 中有1个警告。
#> ℹ 在参数: `production_level = node_level(pick(item_id, sup_item_id))`。
#> 由于在 `graph_from_data_frame()` 中的警告引起:
#> ! 'NA' 元素已被替换为字符串 "NA"
#>    product_id item_id sup_item_id quantity price itemtype production_level
#> 1          P1      i1          i6        2     2        A                3
#> 2          P1      i2          i6        2     5        A                3
#> 3          P1      i3          i6        5     3        A                3
#> 4          P1      i4          i6        1     7        A                3
#> 5          P1      i5          i8        1    10        A                2
#> 6          P1      i6          i8        2     0        B                2
#> 7          P1      i7          i9        4     4        A                1
#> 8          P1      i8          i9        1     0        B                1
#> 9          P1      i9        <NA>        1     0        C                0
#> 10         P2     i10         i15        2     2        A                3
#> 11         P2     i11         i15        2     5        A                3
#> 12         P2     i12         i15        5     3        A                3
#> 13         P2     i13         i15        1     7        A                3
#> 14         P2     i14         i17        1    20        A                2
#> 15         P2     i15         i17        2     0        B                2
#> 16         P2     i16         i18        4     2        A                1
#> 17         P2     i17         i18        1     0        B                1
#> 18         P2     i18        <NA>        1     0        C                0

示例数据:

df <- structure(list(product_id = c(
  "P1", "P1", "P1", "P1", "P1", "P1",
  "P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2",
  "P2"
), item_id = c(
  "i1", "i2", "i3", "i4", "i5", "i6", "i7",
  "i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16",
  "i17", "i18"
), sup_item_id = c(
  "i6", "i6", "i6", "i6", "i8",
  "i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17",
  "i18", "i18", NA
), quantity = c(
  2, 2, 5, 1, 1, 2, 4, 1, 1, 2,
  2, 5, 1, 1, 2, 4, 1, 1
), price = c(
  2, 5, 3, 7, 10, 0, 4, 0, 0,
  2, 5, 3, 7, 20, 0, 2, 0, 0
), itemtype = c(
  "A", "A", "A", "A",
  "A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B",
  "C"
)), class = "data.frame", row.names = c(NA, -18L))

创建于2023年03月01日,使用reprex v2.0.2

英文:

If you would consider using tidygraph / igraph instead, try if this works and scales on your actual data.

item_id and sup_item_id are used as from / to edge columns for creating tidygraph object. There's a warning about "NA" string, as tidygraph added a node named "NA", filtering deals with that.

igraph provides eccentricity measure,
> The eccentricity of a vertex is its shortest path distance from the farthest other node in the graph.

and this should be precisely what you are after, though please test with corner cases like shared item_ids and such.

library(dplyr)
library(tidygraph)

# to_from_df, a 2-column data.frame for igraph edges
# returns node eccentricity measure vector
node_level &lt;- function(from_to_df){
  as_tbl_graph(from_to_df) %&gt;% 
    activate(nodes)  %&gt;% 
    filter(name != &quot;NA&quot;) %&gt;% 
    mutate(eccentricity = node_eccentricity()) %&gt;% 
    pull(eccentricity)
}

df %&gt;% mutate(production_level = node_level(pick(item_id, sup_item_id)))

#&gt; Warning: There was 1 warning in `mutate()`.
#&gt; ℹ In argument: `production_level = node_level(pick(item_id, sup_item_id))`.
#&gt; Caused by warning in `graph_from_data_frame()`:
#&gt; ! In `d&#39; `NA&#39; elements were replaced with string &quot;NA&quot;
#&gt;    product_id item_id sup_item_id quantity price itemtype production_level
#&gt; 1          P1      i1          i6        2     2        A                3
#&gt; 2          P1      i2          i6        2     5        A                3
#&gt; 3          P1      i3          i6        5     3        A                3
#&gt; 4          P1      i4          i6        1     7        A                3
#&gt; 5          P1      i5          i8        1    10        A                2
#&gt; 6          P1      i6          i8        2     0        B                2
#&gt; 7          P1      i7          i9        4     4        A                1
#&gt; 8          P1      i8          i9        1     0        B                1
#&gt; 9          P1      i9        &lt;NA&gt;        1     0        C                0
#&gt; 10         P2     i10         i15        2     2        A                3
#&gt; 11         P2     i11         i15        2     5        A                3
#&gt; 12         P2     i12         i15        5     3        A                3
#&gt; 13         P2     i13         i15        1     7        A                3
#&gt; 14         P2     i14         i17        1    20        A                2
#&gt; 15         P2     i15         i17        2     0        B                2
#&gt; 16         P2     i16         i18        4     2        A                1
#&gt; 17         P2     i17         i18        1     0        B                1
#&gt; 18         P2     i18        &lt;NA&gt;        1     0        C                0

Example data:

df &lt;- structure(list(product_id = c(
  &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;,
  &quot;P1&quot;, &quot;P1&quot;, &quot;P1&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;, &quot;P2&quot;,
  &quot;P2&quot;
), item_id = c(
  &quot;i1&quot;, &quot;i2&quot;, &quot;i3&quot;, &quot;i4&quot;, &quot;i5&quot;, &quot;i6&quot;, &quot;i7&quot;,
  &quot;i8&quot;, &quot;i9&quot;, &quot;i10&quot;, &quot;i11&quot;, &quot;i12&quot;, &quot;i13&quot;, &quot;i14&quot;, &quot;i15&quot;, &quot;i16&quot;,
  &quot;i17&quot;, &quot;i18&quot;
), sup_item_id = c(
  &quot;i6&quot;, &quot;i6&quot;, &quot;i6&quot;, &quot;i6&quot;, &quot;i8&quot;,
  &quot;i8&quot;, &quot;i9&quot;, &quot;i9&quot;, NA, &quot;i15&quot;, &quot;i15&quot;, &quot;i15&quot;, &quot;i15&quot;, &quot;i17&quot;, &quot;i17&quot;,
  &quot;i18&quot;, &quot;i18&quot;, NA
), quantity = c(
  2, 2, 5, 1, 1, 2, 4, 1, 1, 2,
  2, 5, 1, 1, 2, 4, 1, 1
), price = c(
  2, 5, 3, 7, 10, 0, 4, 0, 0,
  2, 5, 3, 7, 20, 0, 2, 0, 0
), itemtype = c(
  &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;,
  &quot;A&quot;, &quot;B&quot;, &quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;A&quot;, &quot;B&quot;, &quot;A&quot;, &quot;B&quot;,
  &quot;C&quot;
)), class = &quot;data.frame&quot;, row.names = c(NA, -18L))

<sup>Created on 2023-03-01 with reprex v2.0.2</sup>

huangapple
  • 本文由 发表于 2023年3月1日 15:26:01
  • 转载请务必保留本文链接:https://go.coder-hub.com/75600634.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定