Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

huangapple go评论63阅读模式
英文:

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

问题

我有以下两个具有不同形状和列名的数据框:

# 加载所需的库
import pandas as pd
import matplotlib.pyplot as plt

# 创建数据集1
data_set_1 = {
    'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4],
    'cycle_1': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 0.0, 0.2, 0.4, 0.6, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6],
    'Salary_1': [6, 7, 7, 7, 8, 9, 10, 11, 12, 3, 4, 4, 4, 2, 8, 9, 10, 11, 12, 13, 14, 1, 8, 9, 10],
    'Children_1': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
    'Days_1': [141, 123, 128, 66, 66, 120, 141, 52, 52, 141, 96, 120, 120, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128],
}

# 转换为数据框1
df_1 = pd.DataFrame(data_set_1)

# 创建数据集2
data_set_2 = {
    'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4],
    'cycle_2': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
    'Salary_2': [7, 8, 8, 8, 8, 9, 14, 21, 12, 19, 14, 20, 1, 6, 3, 8, 4, 9, 8, 6, 4, 9, 10, 4, 12, 13, 6, 1, 4, 9, 10, 9, 4],
    'Children_2': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes'],
    'Days_2': [141, 123, 128, 66, 66, 120, 141, 52, 96, 120, 141, 52, 141, 96, 120, 120, 141, 52, 96, 141, 15, 123, 128, 66, 120, 141, 141, 141, 141, 123, 128, 66, 67],
}

# 转换为数据框2
df_2 = pd.DataFrame(data_set_2)

现在,我希望绘制cycle_1 vs. Salary_1,并与cycle_2 vs. Salary_2重叠,对于不同子图中的每个id

因此,我需要使用子图函数,如下所示:

# 为所有id绘制子图
plt_fig_verify = plt.figure(figsize=(10, 8))

for i in range(1, 5):
    plt.subplot(4, 1, i)
    plt.plot(df_1.groupby(by="id").get_group(i)['cycle_1'], df_1.groupby(by="id").get_group(i)['Salary_1'], 'b', linewidth='1', label=f'id{i}: Salary_1 of df_1')
    plt.plot(df_2.groupby(by="id").get_group(i)['cycle_2'], df_2.groupby(by="id").get_group(i)['Salary_2'], 'r', linewidth='1', label=f'id{i}: Salary_2 of df_2')
    plt.xlabel('cycle')
    plt.ylabel('Salary')
    plt.legend()

plt.show()

这段代码会为每个id创建一个子图,并绘制相应的数据。您不需要多次重复相同的代码,而是使用循环进行迭代来创建子图。

英文:

I have the below two dataframes with different shapes and column names:

#Load the required libraries
import pandas as pd
import matplotlib.pyplot as plt
#Create dataset_1
data_set_1 = {'id': [1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4,],
'cycle_1': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,
0.0, 0.2,0.4, 0.6,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, ],
'Salary_1': [6, 7, 7, 7,8,9,10,11,12,
3, 4, 4, 4,
2, 8,9,10,11,12,13,14,
1, 8,9,10,],
'Children_1': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes',
'Yes', 'Yes', 'Yes', 'No',  
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', ],
'Days_1': [141, 123, 128, 66, 66, 120, 141, 52, 52,
141, 96, 120,120, 
141,  15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, ],
}
#Convert to dataframe_1
df_1 = pd.DataFrame(data_set_1)
print("\n df_1 = \n",df_1)
#Create dataset_2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle_2': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary_2': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
'Children_2': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days_2': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141,  15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_2
df_2 = pd.DataFrame(data_set_2)
print("\n df_2 = \n",df_2)

Now, here I wish to plot the cycle_1 vs Salary_1, and overlap it with cycle_2 vs Salary_2, for every id in different subplots.

Thus I need to use subplot function as such:

## Plot for all id's
plt_fig_verify = plt.figure(figsize=(10,8))
## id1: 
plt.subplot(4,1,1)
plt.plot(df_1.groupby(by="id").get_group(1)['cycle_1'], df_1.groupby(by="id").get_group(1)['Salary_1'], 'b',  linewidth = '1', label ='id1: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(1)['cycle_2'], df_2.groupby(by="id").get_group(1)['Salary_2'], 'r',  linewidth = '1', label ='id1: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id2: 
plt.subplot(4,1,2)
plt.plot(df_1.groupby(by="id").get_group(2)['cycle_1'], df_1.groupby(by="id").get_group(2)['Salary_1'], 'b',  linewidth = '1', label ='id2: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(2)['cycle_2'], df_2.groupby(by="id").get_group(2)['Salary_2'], 'r',  linewidth = '1', label ='id2: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id3: 
plt.subplot(4,1,3)
plt.plot(df_1.groupby(by="id").get_group(3)['cycle_1'], df_1.groupby(by="id").get_group(3)['Salary_1'], 'b',  linewidth = '1', label ='id3: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(3)['cycle_2'], df_2.groupby(by="id").get_group(3)['Salary_2'], 'r',  linewidth = '1', label ='id3: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id4: 
plt.subplot(4,1,4)
plt.plot(df_1.groupby(by="id").get_group(4)['cycle_1'], df_1.groupby(by="id").get_group(4)['Salary_1'], 'b',  linewidth = '1', label ='id4: Salary_1 of df_1')
plt.plot(df_2.groupby(by="id").get_group(4)['cycle_2'], df_2.groupby(by="id").get_group(4)['Salary_2'], 'r',  linewidth = '1', label ='id4: Salary_2 of df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
plt.show()

The plot looks as such:

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

However, here I need to write the codes for the subplot function four times, with different column names, i.e. for all four id's of the dataframe, and then overlap.

Is there any way out, by which we can have some iterative function and write the subplot function only once and get all four overalapped subplots.

Can somebody please let me know how to achieve this task in Python?

答案1

得分: 1

你可以稍微调整这个代码,以确保这两个数据框具有相同的标题。

colors = {"df_1": "blue", "df_2": "red"}

df = (
    pd.concat(
        [df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
            .rename(lambda x: x.split("_")[0], axis=1)
)

fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))

for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
    for i, s in enumerate(df.index.levels[0], start=1):
        g.loc
展开收缩
.plot(
x="cycle", y="Salary", xlabel="Cycle", ylabel="Salary", label=f"id{n}: Salary_{i} of {s}", color=colors
展开收缩
,
ax=ax ) plt.tight_layout() plt.show();

输出:

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

英文:

You can slightly adjust this code to make sure that the two dataframes have the same header.

colors = {"df_1": "blue", "df_2": "red"}
df = (
pd.concat(
[df_1, df_2.set_axis(df_1.columns, axis=1)], keys=colors)
.rename(lambda x: x.split("_")[0], axis=1)
)
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
for i, s in enumerate(df.index.levels[0], start=1):
g.loc
展开收缩
.plot( x="cycle", y="Salary", xlabel="Cycle", ylabel="Salary", label=f"id{n}: Salary_{i} of {s}", color=colors
展开收缩
, ax=ax ) plt.tight_layout() plt.show();

Output :

Create subplot, by overlapping two dataframes of different shapes and column names, for every group/id,

huangapple
  • 本文由 发表于 2023年6月18日 18:54:01
  • 转载请务必保留本文链接:https://go.coder-hub.com/76500170.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定