2023年6月18日 18:54:01go评论88阅读模式

英文:

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

问题

我有以下两个具有不同形状和列名的数据框：

# 加载所需的库
import pandas as pd
import matplotlib.pyplot as plt
# 创建数据集1
data_set_1 = {
    'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4],
    'cycle_1': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 0.0, 0.2, 0.4, 0.6, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6],
    'Salary_1': [6, 7, 7, 7, 8, 9, 10, 11, 12, 3, 4, 4, 4, 2, 8, 9, 10, 11, 12, 13, 14, 1, 8, 9, 10],
    'Children_1': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
    'Days_1': [141, 123, 128, 66, 66, 120, 141, 52, 52, 141, 96, 120, 120, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128],
}
# 转换为数据框1
df_1 = pd.DataFrame(data_set_1)
# 创建数据集2
data_set_2 = {
    'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4],
    'cycle_2': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
    'Salary_2': [7, 8, 8, 8, 8, 9, 14, 21, 12, 19, 14, 20, 1, 6, 3, 8, 4, 9, 8, 6, 4, 9, 10, 4, 12, 13, 6, 1, 4, 9, 10, 9, 4],
    'Children_2': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
    'Days_2': [141, 123, 128, 66, 66, 120, 141, 52, 96, 120, 141, 52, 141, 96, 120, 120, 141, 52, 96, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128, 66, 67],
}
# 转换为数据框2
df_2 = pd.DataFrame(data_set_2)

现在，我希望绘制cycle_1 vs. Salary_1，并与cycle_2 vs. Salary_2重叠，对于不同子图中的每个id。

因此，我需要使用子图函数，如下所示：

# 为所有id绘制子图
plt_fig_verify = plt.figure(figsize=(10, 8))
for i in range(1, 5):
    plt.subplot(4, 1, i)
    plt.plot(df_1.groupby(by="id").get_group(i)['cycle_1'], df_1.groupby(by="id").get_group(i)['Salary_1'], 'b', linewidth='1', label=f'id{i}: Salary_1 of df_1')
    plt.plot(df_2.groupby(by="id").get_group(i)['cycle_2'], df_2.groupby(by="id").get_group(i)['Salary_2'], 'r', linewidth='1', label=f'id{i}: Salary_2 of df_2')
    plt.xlabel('cycle')
    plt.ylabel('Salary')
    plt.legend()
plt.show()

这段代码会为每个id创建一个子图，并绘制相应的数据。您不需要多次重复相同的代码，而是使用循环进行迭代来创建子图。

英文:

I have the below two dataframes with different shapes and column names:

#Load the required libraries
import pandas as pd
import matplotlib.pyplot as plt
#Create dataset_1
data_set_1 = {&#39;id&#39;: [1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4,],
&#39;cycle_1&#39;: [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,
0.0, 0.2,0.4, 0.6,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, ],
&#39;Salary_1&#39;: [6, 7, 7, 7,8,9,10,11,12,
3, 4, 4, 4,
2, 8,9,10,11,12,13,14,
1, 8,9,10,],
&#39;Children_1&#39;: [&#39;Yes&#39;, &#39;No&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;No&#39;, &#39;Yes&#39;,
&#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,  
&#39;Yes&#39;, &#39;No&#39;,&#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;No&#39;, &#39;Yes&#39;,&#39;Yes&#39;,
&#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;Yes&#39;, ],
&#39;Days_1&#39;: [141, 123, 128, 66, 66, 120, 141, 52, 52,
141, 96, 120,120, 
141,  15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, ],
}
#Convert to dataframe_1
df_1 = pd.DataFrame(data_set_1)
print(&quot;\n df_1 = \n&quot;,df_1)
#Create dataset_2
data_set_2 = {&#39;id&#39;: [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
&#39;cycle_2&#39;: [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
&#39;Salary_2&#39;: [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
&#39;Children_2&#39;: [&#39;Yes&#39;, &#39;No&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;No&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,
&#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;, &#39;Yes&#39;, &#39;Yes&#39;, &#39;Yes&#39;, 
&#39;Yes&#39;, &#39;No&#39;,&#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;No&#39;, &#39;Yes&#39;,&#39;Yes&#39;,
&#39;Yes&#39;, &#39;Yes&#39;, &#39;No&#39;,&#39;Yes&#39;, &#39;Yes&#39;,&#39;Yes&#39;,],
&#39;Days_2&#39;: [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141,  15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_2
df_2 = pd.DataFrame(data_set_2)
print(&quot;\n df_2 = \n&quot;,df_2)

Now, here I wish to plot the cycle_1 vs Salary_1, and overlap it with cycle_2 vs Salary_2, for every id in different subplots.

Thus I need to use subplot function as such:

## Plot for all id&#39;s
plt_fig_verify = plt.figure(figsize=(10,8))
## id1: 
plt.subplot(4,1,1)
plt.plot(df_1.groupby(by=&quot;id&quot;).get_group(1)[&#39;cycle_1&#39;], df_1.groupby(by=&quot;id&quot;).get_group(1)[&#39;Salary_1&#39;], &#39;b&#39;,  linewidth = &#39;1&#39;, label =&#39;id1: Salary_1 of df_1&#39;)
plt.plot(df_2.groupby(by=&quot;id&quot;).get_group(1)[&#39;cycle_2&#39;], df_2.groupby(by=&quot;id&quot;).get_group(1)[&#39;Salary_2&#39;], &#39;r&#39;,  linewidth = &#39;1&#39;, label =&#39;id1: Salary_2 of df_2&#39;)
plt.xlabel(&#39;cycle&#39;)
plt.ylabel(&#39;Salary&#39;)
plt.legend()
## id2: 
plt.subplot(4,1,2)
plt.plot(df_1.groupby(by=&quot;id&quot;).get_group(2)[&#39;cycle_1&#39;], df_1.groupby(by=&quot;id&quot;).get_group(2)[&#39;Salary_1&#39;], &#39;b&#39;,  linewidth = &#39;1&#39;, label =&#39;id2: Salary_1 of df_1&#39;)
plt.plot(df_2.groupby(by=&quot;id&quot;).get_group(2)[&#39;cycle_2&#39;], df_2.groupby(by=&quot;id&quot;).get_group(2)[&#39;Salary_2&#39;], &#39;r&#39;,  linewidth = &#39;1&#39;, label =&#39;id2: Salary_2 of df_2&#39;)
plt.xlabel(&#39;cycle&#39;)
plt.ylabel(&#39;Salary&#39;)
plt.legend()
## id3: 
plt.subplot(4,1,3)
plt.plot(df_1.groupby(by=&quot;id&quot;).get_group(3)[&#39;cycle_1&#39;], df_1.groupby(by=&quot;id&quot;).get_group(3)[&#39;Salary_1&#39;], &#39;b&#39;,  linewidth = &#39;1&#39;, label =&#39;id3: Salary_1 of df_1&#39;)
plt.plot(df_2.groupby(by=&quot;id&quot;).get_group(3)[&#39;cycle_2&#39;], df_2.groupby(by=&quot;id&quot;).get_group(3)[&#39;Salary_2&#39;], &#39;r&#39;,  linewidth = &#39;1&#39;, label =&#39;id3: Salary_2 of df_2&#39;)
plt.xlabel(&#39;cycle&#39;)
plt.ylabel(&#39;Salary&#39;)
plt.legend()
## id4: 
plt.subplot(4,1,4)
plt.plot(df_1.groupby(by=&quot;id&quot;).get_group(4)[&#39;cycle_1&#39;], df_1.groupby(by=&quot;id&quot;).get_group(4)[&#39;Salary_1&#39;], &#39;b&#39;,  linewidth = &#39;1&#39;, label =&#39;id4: Salary_1 of df_1&#39;)
plt.plot(df_2.groupby(by=&quot;id&quot;).get_group(4)[&#39;cycle_2&#39;], df_2.groupby(by=&quot;id&quot;).get_group(4)[&#39;Salary_2&#39;], &#39;r&#39;,  linewidth = &#39;1&#39;, label =&#39;id4: Salary_2 of df_2&#39;)
plt.xlabel(&#39;cycle&#39;)
plt.ylabel(&#39;Salary&#39;)
plt.legend()
plt.show()

The plot looks as such:

However, here I need to write the codes for the subplot function four times, with different column names, i.e. for all four id's of the dataframe, and then overlap.

Is there any way out, by which we can have some iterative function and write the subplot function only once and get all four overalapped subplots.

Can somebody please let me know how to achieve this task in Python?

答案1

得分: 1

你可以稍微调整这个代码，以确保这两个数据框具有相同的标题。

colors = {"df_1": "blue", "df_2": "red"}
df = (
    pd.concat(
        [df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
            .rename(lambda x: x.split("_")[0], axis=1)
)
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
    for i, s in enumerate(df.index.levels[0], start=1):
        g.loc展开收缩
.plot(
            x="cycle", y="Salary",
            xlabel="Cycle", ylabel="Salary",
            label=f"id{n}: Salary_{i} of {s}",
            color=colors展开收缩
,
            ax=ax
        )
plt.tight_layout()
plt.show();

输出：

英文:

You can slightly adjust this code to make sure that the two dataframes have the same header.

colors = {&quot;df_1&quot;: &quot;blue&quot;, &quot;df_2&quot;: &quot;red&quot;}
df = (
pd.concat(
[df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
.rename(lambda x: x.split(&quot;_&quot;)[0], axis=1)
)
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df[&quot;id&quot;].unique()))
for (n, g), ax in zip(df.groupby(&quot;id&quot;), axs.flatten()):
for i, s in enumerate(df.index.levels[0], start=1):
g.loc展开收缩
.plot(
x=&quot;cycle&quot;, y=&quot;Salary&quot;,
xlabel=&quot;Cycle&quot;, ylabel=&quot;Salary&quot;,
label=f&quot;id{n}: Salary_{i} of {s}&quot;,
color=colors展开收缩
,
ax=ax
)
plt.tight_layout()
plt.show();

Output :

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

问题

答案1

我该如何将DataFrame按照PyTorch Geometric的节点索引重新排列？

Kafka 确保消费者组保持活跃

Can't go past Cloudflare's verify you are human check even after clicking the check box multiple times when using Selenium

受限零钱找零问题的Python和Java转换

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。