英文:
set sankey flow.fill to last node
问题
在我的示例桑基图中,flow.fill 和 flow.color 是由前一个节点设置的(例如,所有在 time_0 时刻的“黄色”流都具有黄色的填充)。我想根据最后一个节点来给流着色。例如,在 time_1 时刻,所有进入“黄色”的流(黄色-黄色、红色-黄色)都具有黄色的填充,而不是下面所看到的(红色-黄色是红色)。
library(tidyverse)
library(ggsankey)
set.seed(2)
# 标准桑基图
df <- tibble(
id = seq(1:22168),
time_0 = c(rep("red", 13309), rep("yellow", 8699), rep("green", 160)),
time_1 = c(rep("red", 1110), rep("yellow", 3771), rep("green", 8428),
rep("red", 321), rep("yellow", 1940), rep("green", 6438),
rep("red", 4), rep("yellow", 26), rep("green", 130))
) %>%
{. ->> df2} %>%
mutate(across(starts_with("time"), factor,
levels = c("green", "yellow", "red")))
df_sankey <- df %>%
ggsankey::make_long(time_0, time_1)
df_sankey_t <- df_sankey %>%
dplyr::group_by(x, node) %>%
tally()
df_sankey <- df_sankey %>%
left_join(df_sankey_t, by = c("x", "node"))
ggplot(df_sankey,
aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node),
label = paste0(node, " n=", n))) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_manual(values = c("green", "red", "yellow")) +
theme_sankey(base_size = 18) +
theme(legend.position = "none",
plot.title.position = "plot",
plot.title = element_text(face="bold", size=20),
plot.subtitle = element_text(size=15)) +
labs(title = "示例桑基图",
subtitle = "希望根据最后一个节点来着色 flow.fill 和 flow.color",
x = NULL)
[1]: https://i.stack.imgur.com/fwz2K.png
英文:
In my example sankey diagram, the flow.fill and flow.color are set by the previous node (e.g., all "yellow" flows at time_0 have yellow fill). I would like to color the flows by the final node. For instance, all flows going into "yellow" at time_1 (yellow-yellow, red-yellow) have yellow fill instead of what you see below (red-yellow is red).
library(tidyverse)
library(ggsankey)
set.seed(2)
# standard sankey
df <- tibble(
id = seq(1:22168),
time_0 = c(rep("red", 13309), rep("yellow", 8699), rep("green", 160)),
time_1 = c(rep("red", 1110), rep("yellow", 3771), rep("green", 8428),
rep("red", 321), rep("yellow", 1940), rep("green", 6438),
rep("red", 4), rep("yellow", 26), rep("green", 130))
) %>%
{. ->> df2} %>%
mutate(across(starts_with("time"), factor,
levels = c("green", "yellow", "red")))
df_sankey <- df %>%
ggsankey::make_long(time_0, time_1)
df_sankey_t <- df_sankey %>%
dplyr::group_by(x, node)%>%
tally()
df_sankey <- df_sankey %>%
left_join(df_sankey_t, by = c("x", "node"))
ggplot(df_sankey,
aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node),
label = paste0(node," n=", n))) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_manual(values = c("green", "red", "yellow")) +
theme_sankey(base_size = 18) +
theme(legend.position = "none",
plot.title.position = "plot",
plot.title = element_text(face="bold", size=20),
plot.subtitle = element_text(size=15)) +
labs(title = "Example sankey diagram",
subtitle = "Would like to color flow.fill and flow.color to be based on last node",
x = NULL)
答案1
得分: 1
你可以使用PantaRhei
部分实现,也可以使用grid
完全实现,以下是代码的翻译部分:
library(PantaRhei)
library(dplyr)
library(tibble)
df1 <- tibble(
id = seq(1:22168),
time_0 = c(rep("red", 13309), rep("yellow", 8699), rep("green", 160)),
time_1 = c(rep("red", 1110), rep("yellow", 3771), rep("green", 8428),
rep("red", 321), rep("yellow", 1940), rep("green", 6438),
rep("red", 4), rep("yellow", 26), rep("green", 130))
) |>
mutate(across(starts_with("time"), factor,
levels = c("green", "yellow", "red")))
# 为流程总结数据
# 标题名称特定于 Panta Rhei 用于处理数据
# Panta Rhei 文档使用 'substance' 变量来命名 'substance' 或名称的流程,在这种情况下,我们将使用它来确定填充。
# 根据您的数据,可能有更有效的定义唯一的 'from' 和 'to' 变量的方法。
flows <-
df1 |>
group_by(time_0, time_1) |>
summarise(quantity = n(), .groups = "drop") |>
mutate(substance = time_1,
from = case_when(time_0 == "yellow" ~ "B",
time_0 == "red" ~ "A",
time_0 == "green" ~ "C"),
to = case_when(time_1 == "yellow" ~ "D",
time_1 == "red" ~ "E",
time_1 == "green" ~ "F"))
# 构建节点数据框
# 设置节点的标签和位置
nodes <-
data.frame(ID = c(unique(flows$from), unique(flows$to)),
label = c(unique(flows$from), unique(flows$to)),
x = c(rep(1, 3), rep(2, 3)),
y = c("1", "1.25", "1.5", "C", "B", "A"),
label_pos = rep(c("left", "right"), each = 3))
colors <- tribble(
~substance, ~color,
"yellow", "yellow",
"red", "red",
"green", "green"
)
sankey(nodes, flows, colors, legend = FALSE)
# PantaRhei 限制
# 虽然您可以更改节点的颜色,但似乎没有一种方法可以单独着色节点
# 虽然单独的节点被标记了,但我找不到一种方法来标记节点列(不确定正确的术语是什么)。
# 无法控制节点数量的格式。
# 不确定如何控制 'to' 节点的顺序。
# 由于 PantaRhei 是基于 grid 构建的,您可能可以将这些功能添加到您的模型中。
# 基于 'grid' 的编辑如下:
library(grid)
# 检查 grid 树的函数。
# grid.force()
# grid.ls()
# grid.ls(grobs = FALSE, viewports = TRUE)
# 根据检查和一些试验进行编辑...
grid.edit("GRID.polygon.38", gp = gpar(fill="red"))
grid.edit("GRID.polygon.33", gp = gpar(fill="yellow"))
grid.edit("GRID.polygon.29", gp = gpar(fill="green"))
grid.edit("GRID.polygon.17", gp = gpar(fill="red"))
grid.edit("GRID.polygon.21", gp = gpar(fill="yellow"))
grid.edit("GRID.polygon.25", gp = gpar(fill="green"))
# 返回到根视口
popViewport()
# 添加节点列的标签
grid.text(label = c("time_0", "time_1"), x = c(0.25, 0.75), y = rep(0.15, 2))
# 您还可以编辑节点标签
# 我只是编辑了一个作为示例
grid.edit("GRID.text.39", label = "Red")
# 可能有更有效的方法来实现您所期望的效果。
# 我仍在逐渐掌握 grid...!
创建于2023-06-03,使用 reprex v2.0.2
英文:
You can get part of the way using PantaRhei
and all the way with inputs from grid
library(PantaRhei)
library(dplyr)
library(tibble)
df1 <- tibble(
id = seq(1:22168),
time_0 = c(rep("red", 13309), rep("yellow", 8699), rep("green", 160)),
time_1 = c(rep("red", 1110), rep("yellow", 3771), rep("green", 8428),
rep("red", 321), rep("yellow", 1940), rep("green", 6438),
rep("red", 4), rep("yellow", 26), rep("green", 130))
) |>
mutate(across(starts_with("time"), factor,
levels = c("green", "yellow", "red")))
# summarise data for flows
# the heading names are specific to Panta Rhei for processing the data
# Panta Rhei documentation uses the 'substance' variable to name the 'substance' or name
# of the flow, in this case we'll use it to determine the fill.
# There may be more efficient ways to define unique 'from' and 'to' variables depending on your data.
flows <-
df1 |>
group_by(time_0, time_1) |>
summarise(quantity = n(), .groups = "drop") |>
mutate(substance = time_1,
from = case_when(time_0 == "yellow" ~ "B",
time_0 == "red" ~ "A",
time_0 == "green" ~ "C"),
to = case_when(time_1 == "yellow" ~ "D",
time_1 == "red" ~ "E",
time_1 == "green" ~ "F"))
# build up a nodes data frame
# to set labels and position of nodes
nodes <-
data.frame(ID = c(unique(flows$from), unique(flows$to)),
label = c(unique(flows$from), unique(flows$to)),
x = c(rep(1, 3), rep(2, 3)),
y = c("1", "1.25", "1.5", "C", "B", "A"),
label_pos = rep(c("left", "right"), each = 3))
colors <- tribble(
~substance, ~color,
"yellow", "yellow",
"red", "red",
"green", "green"
)
sankey(nodes, flows, colors, legend = FALSE)
# PantaRhei limitations
# Although you could change the colour of the nodes, there does not seem to be a way colour nodes individually
# While individual nodes are labelled I could not find a way to label the node columns (not sure of the correct term).
# Unable to control the formatting for node quantity.
# Not sure how to control the order of the 'to' nodes.
# As PantaRhei is build from grid you could probably add these features to your model.
# 'grid' based edits noted below:
library(grid)
# functions to inspect the grid tree.
# grid.force()
# grid.ls()
# grid.ls(grobs = FALSE, viewports = TRUE)
# make edits following inspection and a bit of trial and error...
grid.edit("GRID.polygon.38", gp = gpar(fill="red"))
grid.edit("GRID.polygon.33", gp = gpar(fill="yellow"))
grid.edit("GRID.polygon.29", gp = gpar(fill="green"))
grid.edit("GRID.polygon.17", gp = gpar(fill="red"))
grid.edit("GRID.polygon.21", gp = gpar(fill="yellow"))
grid.edit("GRID.polygon.25", gp = gpar(fill="green"))
# Get back to the root viewport
popViewport()
# add labels to node columns
grid.text(label = c("time_0", "time_1"), x = c(0.25, 0.75), y = rep(0.15, 2))
# you could also edit the node labels
# I've just edited one as an example
grid.edit("GRID.text.39", label = "Red")
# There may be more efficient ways to achieve the effect you desire.
# I'm still getting to grips with grid...!
<sup>Created on 2023-06-03 with reprex v2.0.2</sup>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论