英文:
Using Map() in a Recursive Function in R
问题
以下是您要翻译的代码部分:
这是我的带有数据结构方法的BOM:
dput(df2)
structure(list(product_id = c("P1", "P1", "P1", "P1", "P1", "P1", "P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2"), item_id = c("i1", "i2", "i3", "i4", "i5", "i6", "i7", "i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16", "i17", "i18"), sup_item_id = c("i6", "i6", "i6", "i6", "i8", "i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17", "i18", "i18", NA), quantity = c(2, 2, 5, 1, 1, 2, 4, 1, 1, 2, 2, 5, 1, 1, 2, 4, 1, 1), price = c(2, 5, 3, 7, 10, 0, 4, 0, 0, 2, 5, 3, 7, 20, 0, 2, 0, 0), itemtype = c("A", "A", "A", "A", "A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B", "C")), class = "data.frame", row.names = c(NA, -18L))
要定义项目级别(从上到下),这是具有内部循环的递归函数:
change_df <- function(df, changed = TRUE, idx = which(df$production_level == 1)) {
for (i in idx) {
descendants <- which(df$sup_item_id == df$item_id[i])
if (length(descendants) > 0) {
new_levels <- df$production_level[i] + 1
if (any(df$production_level[descendants] < new_levels)) {
df$production_level[descendants] <- new_levels
df <- change_df(df, idx = idx)
}
}
}
return(df)
}
然后我这样调用它:
df2 %>%
arrange(product_id, item_id) %>%
mutate(production_level = ifelse(is.na(sup_item_id), 0, 1)) %>%
change_df(df2)
这是我的代码的输出:
product_id item_id sup_item_id quantity price itemtype production_level
1 P1 i1 i6 2 2 A 3
2 P1 i2 i6 2 5 A 3
3 P1 i3 i6 5 3 A 3
4 P1 i4 i6 1 7 A 3
5 P1 i5 i8 1 10 A 2
6 P1 i6 i8 2 0 B 2
7 P1 i7 i9 4 4 A 1
8 P1 i8 i9 1 0 B 1
9 P1 i9 <NA> 1 0 C 0
10 P2 i10 i15 2 2 A 3
11 P2 i11 i15 2 5 A 3
12 P2 i12 i15 5 3 A 3
13 P2 i13 i15 1 7 A 3
14 P2 i14 i17 1 20 A 2
15 P2 i15 i17 2 0 B 2
16 P2 i16 i18 4 2 A 1
17 P2 i17 i18 1 0 B 1
18 P2 i18 <NA> 1 0 C 0
我不想更改输出。现在,我想使用R中的map()函数代替循环,以使我的代码更紧凑。我还想知道idx方法是否有效,或者是否有更好的方法。如果您有任何建议,请分享!
英文:
Here is my BOM with a data structure approach:
dput(df2)
structure(list(product_id = c("P1", "P1", "P1", "P1", "P1", "P1",
"P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2",
"P2"), item_id = c("i1", "i2", "i3", "i4", "i5", "i6", "i7",
"i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16",
"i17", "i18"), sup_item_id = c("i6", "i6", "i6", "i6", "i8",
"i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17",
"i18", "i18", NA), quantity = c(2, 2, 5, 1, 1, 2, 4, 1, 1, 2,
2, 5, 1, 1, 2, 4, 1, 1), price = c(2, 5, 3, 7, 10, 0, 4, 0, 0,
2, 5, 3, 7, 20, 0, 2, 0, 0), itemtype = c("A", "A", "A", "A",
"A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B",
"C")), class = "data.frame", row.names = c(NA, -18L))
To define the item levels (from top to bottom) Here is the recursive function with a loop inside:
change_df <- function(df, changed = TRUE, idx = which(df$production_level == 1)) {
for (i in idx) {
descendants <- which(df$sup_item_id == df$item_id[i])
if (length(descendants) > 0) {
new_levels <- df$production_level[i] + 1
if (any(df$production_level[descendants] < new_levels)) {
df$production_level[descendants] <- new_levels
df <- change_df(df, idx = idx)
}
}
}
return(df)
}
I then call it this way:
df2 %>%
arrange(product_id, item_id) %>%
mutate(production_level = ifelse(is.na(sup_item_id), 0, 1)) %>%
change_df(df2)
This is the output of my code:
product_id item_id sup_item_id quantity price itemtype production_level
1 P1 i1 i6 2 2 A 3
2 P1 i2 i6 2 5 A 3
3 P1 i3 i6 5 3 A 3
4 P1 i4 i6 1 7 A 3
5 P1 i5 i8 1 10 A 2
6 P1 i6 i8 2 0 B 2
7 P1 i7 i9 4 4 A 1
8 P1 i8 i9 1 0 B 1
9 P1 i9 <NA> 1 0 C 0
10 P2 i10 i15 2 2 A 3
11 P2 i11 i15 2 5 A 3
12 P2 i12 i15 5 3 A 3
13 P2 i13 i15 1 7 A 3
14 P2 i14 i17 1 20 A 2
15 P2 i15 i17 2 0 B 2
16 P2 i16 i18 4 2 A 1
17 P2 i17 i18 1 0 B 1
18 P2 i18 <NA> 1 0 C 0
I dont want to change the output. Now I want to use map() function in R instead of the loop to make my code more compact. I also wonder if the idx approach is good or would something else work better. If you have any suggestions, please share!
答案1
得分: 2
如果您考虑使用tidygraph / igraph,请尝试看看是否在您的实际数据上运行并扩展。
item_id
和 sup_item_id
用作创建tidygraph对象的边缘列。有一个关于"NA"字符串的警告,因为tidygraph添加了一个名为"NA"的节点,过滤器处理这个问题。
igraph
提供离心率度量,
顶点的离心率是其到图中最远的其他节点的最短路径距离。
这应该正是您所需要的,尽管请测试具有共享item_id
等特殊情况。
library(dplyr)
library(tidygraph)
# to_from_df,一个用于igraph边缘的2列数据框
# 返回节点离心率度量向量
node_level <- function(from_to_df){
as_tbl_graph(from_to_df) %>%
activate(nodes) %>%
filter(name != "NA") %>%
mutate(eccentricity = node_eccentricity()) %>%
pull(eccentricity)
}
df %>% mutate(production_level = node_level(pick(item_id, sup_item_id)))
#> 警告: 在 `mutate()` 中有1个警告。
#> ℹ 在参数: `production_level = node_level(pick(item_id, sup_item_id))`。
#> 由于在 `graph_from_data_frame()` 中的警告引起:
#> ! 'NA' 元素已被替换为字符串 "NA"
#> product_id item_id sup_item_id quantity price itemtype production_level
#> 1 P1 i1 i6 2 2 A 3
#> 2 P1 i2 i6 2 5 A 3
#> 3 P1 i3 i6 5 3 A 3
#> 4 P1 i4 i6 1 7 A 3
#> 5 P1 i5 i8 1 10 A 2
#> 6 P1 i6 i8 2 0 B 2
#> 7 P1 i7 i9 4 4 A 1
#> 8 P1 i8 i9 1 0 B 1
#> 9 P1 i9 <NA> 1 0 C 0
#> 10 P2 i10 i15 2 2 A 3
#> 11 P2 i11 i15 2 5 A 3
#> 12 P2 i12 i15 5 3 A 3
#> 13 P2 i13 i15 1 7 A 3
#> 14 P2 i14 i17 1 20 A 2
#> 15 P2 i15 i17 2 0 B 2
#> 16 P2 i16 i18 4 2 A 1
#> 17 P2 i17 i18 1 0 B 1
#> 18 P2 i18 <NA> 1 0 C 0
示例数据:
df <- structure(list(product_id = c(
"P1", "P1", "P1", "P1", "P1", "P1",
"P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2",
"P2"
), item_id = c(
"i1", "i2", "i3", "i4", "i5", "i6", "i7",
"i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16",
"i17", "i18"
), sup_item_id = c(
"i6", "i6", "i6", "i6", "i8",
"i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17",
"i18", "i18", NA
), quantity = c(
2, 2, 5, 1, 1, 2, 4, 1, 1, 2,
2, 5, 1, 1, 2, 4, 1, 1
), price = c(
2, 5, 3, 7, 10, 0, 4, 0, 0,
2, 5, 3, 7, 20, 0, 2, 0, 0
), itemtype = c(
"A", "A", "A", "A",
"A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B",
"C"
)), class = "data.frame", row.names = c(NA, -18L))
创建于2023年03月01日,使用reprex v2.0.2。
英文:
If you would consider using tidygraph / igraph instead, try if this works and scales on your actual data.
item_id
and sup_item_id
are used as from / to edge columns for creating tidygraph object. There's a warning about "NA" string, as tidygraph added a node named "NA", filtering deals with that.
igraph
provides eccentricity measure,
> The eccentricity of a vertex is its shortest path distance from the farthest other node in the graph.
and this should be precisely what you are after, though please test with corner cases like shared item_id
s and such.
library(dplyr)
library(tidygraph)
# to_from_df, a 2-column data.frame for igraph edges
# returns node eccentricity measure vector
node_level <- function(from_to_df){
as_tbl_graph(from_to_df) %>%
activate(nodes) %>%
filter(name != "NA") %>%
mutate(eccentricity = node_eccentricity()) %>%
pull(eccentricity)
}
df %>% mutate(production_level = node_level(pick(item_id, sup_item_id)))
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `production_level = node_level(pick(item_id, sup_item_id))`.
#> Caused by warning in `graph_from_data_frame()`:
#> ! In `d' `NA' elements were replaced with string "NA"
#> product_id item_id sup_item_id quantity price itemtype production_level
#> 1 P1 i1 i6 2 2 A 3
#> 2 P1 i2 i6 2 5 A 3
#> 3 P1 i3 i6 5 3 A 3
#> 4 P1 i4 i6 1 7 A 3
#> 5 P1 i5 i8 1 10 A 2
#> 6 P1 i6 i8 2 0 B 2
#> 7 P1 i7 i9 4 4 A 1
#> 8 P1 i8 i9 1 0 B 1
#> 9 P1 i9 <NA> 1 0 C 0
#> 10 P2 i10 i15 2 2 A 3
#> 11 P2 i11 i15 2 5 A 3
#> 12 P2 i12 i15 5 3 A 3
#> 13 P2 i13 i15 1 7 A 3
#> 14 P2 i14 i17 1 20 A 2
#> 15 P2 i15 i17 2 0 B 2
#> 16 P2 i16 i18 4 2 A 1
#> 17 P2 i17 i18 1 0 B 1
#> 18 P2 i18 <NA> 1 0 C 0
Example data:
df <- structure(list(product_id = c(
"P1", "P1", "P1", "P1", "P1", "P1",
"P1", "P1", "P1", "P2", "P2", "P2", "P2", "P2", "P2", "P2", "P2",
"P2"
), item_id = c(
"i1", "i2", "i3", "i4", "i5", "i6", "i7",
"i8", "i9", "i10", "i11", "i12", "i13", "i14", "i15", "i16",
"i17", "i18"
), sup_item_id = c(
"i6", "i6", "i6", "i6", "i8",
"i8", "i9", "i9", NA, "i15", "i15", "i15", "i15", "i17", "i17",
"i18", "i18", NA
), quantity = c(
2, 2, 5, 1, 1, 2, 4, 1, 1, 2,
2, 5, 1, 1, 2, 4, 1, 1
), price = c(
2, 5, 3, 7, 10, 0, 4, 0, 0,
2, 5, 3, 7, 20, 0, 2, 0, 0
), itemtype = c(
"A", "A", "A", "A",
"A", "B", "A", "B", "C", "A", "A", "A", "A", "A", "B", "A", "B",
"C"
)), class = "data.frame", row.names = c(NA, -18L))
<sup>Created on 2023-03-01 with reprex v2.0.2</sup>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论