英文:
Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,
问题
我有以下两个具有不同形状和列名的数据框:
# 加载所需的库
import pandas as pd
import matplotlib.pyplot as plt
# 创建数据集1
data_set_1 = {
'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4],
'cycle_1': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 0.0, 0.2, 0.4, 0.6, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6],
'Salary_1': [6, 7, 7, 7, 8, 9, 10, 11, 12, 3, 4, 4, 4, 2, 8, 9, 10, 11, 12, 13, 14, 1, 8, 9, 10],
'Children_1': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
'Days_1': [141, 123, 128, 66, 66, 120, 141, 52, 52, 141, 96, 120, 120, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128],
}
# 转换为数据框1
df_1 = pd.DataFrame(data_set_1)
# 创建数据集2
data_set_2 = {
'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4],
'cycle_2': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
'Salary_2': [7, 8, 8, 8, 8, 9, 14, 21, 12, 19, 14, 20, 1, 6, 3, 8, 4, 9, 8, 6, 4, 9, 10, 4, 12, 13, 6, 1, 4, 9, 10, 9, 4],
'Children_2': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
'Days_2': [141, 123, 128, 66, 66, 120, 141, 52, 96, 120, 141, 52, 141, 96, 120, 120, 141, 52, 96, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128, 66, 67],
}
# 转换为数据框2
df_2 = pd.DataFrame(data_set_2)
现在,我希望绘制cycle_1
vs. Salary_1
,并与cycle_2
vs. Salary_2
重叠,对于不同子图中的每个id
。
因此,我需要使用子图函数,如下所示:
# 为所有id绘制子图
plt_fig_verify = plt.figure(figsize=(10, 8))
for i in range(1, 5):
plt.subplot(4, 1, i)
plt.plot(df_1.groupby(by="id").get_group(i)['cycle_1'], df_1.groupby(by="id").get_group(i)['Salary_1'], 'b', linewidth='1', label=f'id{i}: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(i)['cycle_2'], df_2.groupby(by="id").get_group(i)['Salary_2'], 'r', linewidth='1', label=f'id{i}: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
plt.show()
这段代码会为每个id
创建一个子图,并绘制相应的数据。您不需要多次重复相同的代码,而是使用循环进行迭代来创建子图。
英文:
I have the below two dataframes with different shapes and column names:
#Load the required libraries
import pandas as pd
import matplotlib.pyplot as plt
#Create dataset_1
data_set_1 = {'id': [1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4,],
'cycle_1': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,
0.0, 0.2,0.4, 0.6,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, ],
'Salary_1': [6, 7, 7, 7,8,9,10,11,12,
3, 4, 4, 4,
2, 8,9,10,11,12,13,14,
1, 8,9,10,],
'Children_1': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes',
'Yes', 'Yes', 'Yes', 'No',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', ],
'Days_1': [141, 123, 128, 66, 66, 120, 141, 52, 52,
141, 96, 120,120,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, ],
}
#Convert to dataframe_1
df_1 = pd.DataFrame(data_set_1)
print("\n df_1 = \n",df_1)
#Create dataset_2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle_2': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary_2': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
'Children_2': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days_2': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_2
df_2 = pd.DataFrame(data_set_2)
print("\n df_2 = \n",df_2)
Now, here I wish to plot the cycle_1 vs Salary_1, and overlap it with cycle_2 vs Salary_2, for every id in different subplots.
Thus I need to use subplot function as such:
## Plot for all id's
plt_fig_verify = plt.figure(figsize=(10,8))
## id1:
plt.subplot(4,1,1)
plt.plot(df_1.groupby(by="id").get_group(1)['cycle_1'], df_1.groupby(by="id").get_group(1)['Salary_1'], 'b', linewidth = '1', label ='id1: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(1)['cycle_2'], df_2.groupby(by="id").get_group(1)['Salary_2'], 'r', linewidth = '1', label ='id1: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id2:
plt.subplot(4,1,2)
plt.plot(df_1.groupby(by="id").get_group(2)['cycle_1'], df_1.groupby(by="id").get_group(2)['Salary_1'], 'b', linewidth = '1', label ='id2: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(2)['cycle_2'], df_2.groupby(by="id").get_group(2)['Salary_2'], 'r', linewidth = '1', label ='id2: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id3:
plt.subplot(4,1,3)
plt.plot(df_1.groupby(by="id").get_group(3)['cycle_1'], df_1.groupby(by="id").get_group(3)['Salary_1'], 'b', linewidth = '1', label ='id3: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(3)['cycle_2'], df_2.groupby(by="id").get_group(3)['Salary_2'], 'r', linewidth = '1', label ='id3: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id4:
plt.subplot(4,1,4)
plt.plot(df_1.groupby(by="id").get_group(4)['cycle_1'], df_1.groupby(by="id").get_group(4)['Salary_1'], 'b', linewidth = '1', label ='id4: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(4)['cycle_2'], df_2.groupby(by="id").get_group(4)['Salary_2'], 'r', linewidth = '1', label ='id4: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
plt.show()
The plot looks as such:
However, here I need to write the codes for the subplot function four times, with different column names, i.e. for all four id's of the dataframe, and then overlap.
Is there any way out, by which we can have some iterative function and write the subplot function only once and get all four overalapped subplots
.
Can somebody please let me know how to achieve this task in Python
?
答案1
得分: 1
你可以稍微调整这个代码,以确保这两个数据框具有相同的标题。
colors = {"df_1": "blue", "df_2": "red"}
df = (
pd.concat(
[df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
.rename(lambda x: x.split("_")[0], axis=1)
)
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
for i, s in enumerate(df.index.levels[0], start=1):
g.loc展开收缩.plot(
x="cycle", y="Salary",
xlabel="Cycle", ylabel="Salary",
label=f"id{n}: Salary_{i} of {s}",
color=colors展开收缩,
ax=ax
)
plt.tight_layout()
plt.show();
输出:
英文:
You can slightly adjust this code to make sure that the two dataframes have the same header.
colors = {"df_1": "blue", "df_2": "red"}
df = (
pd.concat(
[df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
.rename(lambda x: x.split("_")[0], axis=1)
)
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
for i, s in enumerate(df.index.levels[0], start=1):
g.loc展开收缩.plot(
x="cycle", y="Salary",
xlabel="Cycle", ylabel="Salary",
label=f"id{n}: Salary_{i} of {s}",
color=colors展开收缩,
ax=ax
)
plt.tight_layout()
plt.show();
Output :
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论