英文:
Create subplot, by overlapping two dataframes, for every group/id
问题
以下是翻译好的代码部分:
# 导入所需的库
import pandas as pd
import matplotlib.pyplot as plt
# 创建数据集1
data_set_1 = {'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4,],
'cycle': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0,],
'Salary': [6, 7, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3, 4, 4, 4, 4, 5, 6,
2, 8, 9, 10, 11, 12, 13, 14,
1, 8, 9, 10, 11, 12,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes',
'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52, 96, 120, 141, 52,
141, 96, 120, 120, 141, 52, 96,
141, 15, 123, 128, 66, 120, 141, 141,
141, 141, 123, 128, 66, 67,],
}
# 转换为DataFrame_1
df_1 = pd.DataFrame(data_set_1)
print("\n df_1 = \n", df_1)
# 创建数据集2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4,],
'cycle': [0.0, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4,
0.0, 0.2, 0.4, 0.6, 0.8, 1.0,],
'Salary': [7, 8, 8, 8, 8, 9, 14, 21, 12, 19, 14, 20,
1, 6, 3, 8, 4, 9, 8,
6, 4, 9, 10, 4, 12, 13, 6,
1, 4, 9, 10, 9, 4,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes',
'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52, 96, 120, 141, 52,
141, 96, 120, 120, 141, 52, 96,
141, 15, 123, 128, 66, 120, 141, 141,
141, 141, 123, 128, 66, 67,],
}
# 转换为DataFrame_2
df_2 = pd.DataFrame(data_set_2)
print("\n df_2 = \n", df_2)
# 绘制图表
plt_fig_verify = plt.figure(figsize=(10, 8))
# 循环绘制子图
for i in range(1, 5):
plt.subplot(4, 1, i)
plt.plot(df_1.groupby(by="id").get_group(i)['cycle'], df_1.groupby(by="id").get_group(i)['Salary'], 'b', linewidth='1', label='id' + str(i) + ': df_1')
plt.plot(df_2.groupby(by="id").get_group(i)['cycle'], df_2.groupby(by="id").get_group(i)['Salary'], 'r', linewidth='1', label='id' + str(i) + ': df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
plt.show()
这个代码会在一个图中创建四个子图,每个子图对应不同的id,从而避免了
英文:
I have the below two dataframe:
#Load the required libraries
import pandas as pd
import matplotlib.pyplot as plt
#Create dataset_1
data_set_1 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [6, 7, 7, 7,8,9,10,11,12,13,14,15,
3, 4, 4, 4,4,5,6,
2, 8,9,10,11,12,13,14,
1, 8,9,10,11,12,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_1
df_1 = pd.DataFrame(data_set_1)
print("\n df_1 = \n",df_1)
#Create dataset_2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_2
df_2 = pd.DataFrame(data_set_2)
print("\n df_2 = \n",df_2)
Now, here I wish to plot the cycle
vs Salary
, and overlap for two dataframes for every id
, in one single plot. Thus I need to use subplot function as such:
## Plot for all id's
plt_fig_verify = plt.figure(figsize=(10,8))
## id1:
plt.subplot(4,1,1)
plt.plot(df_1.groupby(by="id").get_group(1)['cycle'], df_1.groupby(by="id").get_group(1)['Salary'], 'b', linewidth = '1', label ='id1: df_1')
plt.plot(df_2.groupby(by="id").get_group(1)['cycle'], df_2.groupby(by="id").get_group(1)['Salary'], 'r', linewidth = '1', label ='id1: df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id2:
plt.subplot(4,1,2)
plt.plot(df_1.groupby(by="id").get_group(2)['cycle'], df_1.groupby(by="id").get_group(2)['Salary'], 'b', linewidth = '1', label ='id2: df_1')
plt.plot(df_2.groupby(by="id").get_group(2)['cycle'], df_2.groupby(by="id").get_group(2)['Salary'], 'r', linewidth = '1', label ='id2: df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id3:
plt.subplot(4,1,3)
plt.plot(df_1.groupby(by="id").get_group(3)['cycle'], df_1.groupby(by="id").get_group(3)['Salary'], 'b', linewidth = '1', label ='id3: df_1')
plt.plot(df_2.groupby(by="id").get_group(3)['cycle'], df_2.groupby(by="id").get_group(3)['Salary'], 'r', linewidth = '1', label ='id3: df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
## id4:
plt.subplot(4,1,4)
plt.plot(df_1.groupby(by="id").get_group(4)['cycle'], df_1.groupby(by="id").get_group(4)['Salary'], 'b', linewidth = '1', label ='id4: df_1')
plt.plot(df_2.groupby(by="id").get_group(4)['cycle'], df_2.groupby(by="id").get_group(4)['Salary'], 'r', linewidth = '1', label ='id4: df_2')
plt.xlabel('cycle')
plt.ylabel('Salary')
plt.legend()
plt.show()
The result looks as such:
However, here I need to write the codes for the subplot function four times, i.e. for all four id's of the dataframe, and then overlap.
Is there any way out, by which we can have some iterative function and write the subplot function only once and get all four overalapped subplots.
Can somebody please let me know how to achieve this task in Python
?
答案1
得分: 2
我会将两个数据框 concat
起来,然后使用单个 groupby
来制作子图:
colors = {"df_1": "blue", "df_2": "red"}
df = pd.concat([df_1, df_2], keys=colors)
fig, axs = plt.subplots(figsize=(10, 8), nrows=2, ncols=2)
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
for s in df.index.levels[0]:
g.loc展开收缩.plot(
x="cycle", y="Salary",
xlabel="Cycle", ylabel="Salary",
label=f"id {n}: {s}",
color=colors展开收缩,
ax=ax
)
plt.tight_layout()
plt.show();
输出:
如果您需要单列,可以按以下方式更新 subplots
配置:
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
英文:
I would concat
the two dataframes, then use a single groupby
to make the subplots :
colors = {"df_1": "blue", "df_2": "red"}
df = pd.concat([df_1, df_2], keys=colors)
fig, axs = plt.subplots(figsize=(10, 8), nrows=2, ncols=2)
for (n, g), ax in zip(df.groupby("id"), axs.flatten()):
for s in df.index.levels[0]:
g.loc展开收缩.plot(
x="cycle", y="Salary",
xlabel="Cycle", ylabel="Salary",
label=f"id {n}: {s}",
color=colors展开收缩,
ax=ax
)
plt.tight_layout()
plt.show();
Output :
If you need a single col, you can update the subplots
configuration this way :
fig, axs = plt.subplots(figsize=(10, 8), nrows=len(df["id"].unique()))
答案2
得分: 1
如果你想要将这些图以子图的形式展示,具有简洁而交互性强的代码,我建议你使用Plotly!
你只需要执行 pip install plotly
,而且很好的一点是,pandas允许你将Plotly用作绘图的后端引擎!
在导入pandas之后 import pandas as pd
,你可以写下这段小代码 pd.options.plotting.backend = "plotly"
,就这样!你就可以使用Plotly了!
我合并了你创建的两个DataFrame;我创建了一个名为 source
的列,用于区分第一个DataFrame的输入和第二个DataFrame的输入。
下面是修改后的代码示例:
# 导入所需的库
import pandas as pd
pd.options.plotting.backend = "plotly"
# 创建数据集_1
data_set_1 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [6, 7, 7, 7,8,9,10,11,12,13,14,15,
3, 4, 4, 4,4,5,6,
2, 8,9,10,11,12,13,14,
1, 8,9,10,11,12,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
# 创建数据集_2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
# 转换为DataFrame_1
df_1 = pd.DataFrame(data_set_1).assign(source='df_1')
# 转换为DataFrame_2
df_2 = pd.DataFrame(data_set_2).assign(source='df_2')
fig = (
pd.concat([df_1, df_2])
.plot.line(x='cycle', y='Salary', color='source', title='Salary by cycle', facet_row='id', markers=True)
)
fig.show()
希望这对你有帮助!
英文:
If you want these plots as subplots with a short, clean, and interactive code I advise you to use Plotly!
you just need to execute pip install plotly
, and the nice thing about it is that pandas allow you to use plotly as a backend engine for plotting!
after importing pandas import pandas as pd
, you write this small piece of code pd.options.plotting.backend = "plotly"
. and that's all! you can use Plotly!
I merged the two DataFrames you created; I created a column called source
to differentiate between the input of the first DataFrame and the input of the second one.
Here is the modified code sample:
#Load the required libraries
import pandas as pd
pd.options.plotting.backend = "plotly"
#Create dataset_1
data_set_1 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [6, 7, 7, 7,8,9,10,11,12,13,14,15,
3, 4, 4, 4,4,5,6,
2, 8,9,10,11,12,13,14,
1, 8,9,10,11,12,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Create dataset_2
data_set_2 = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3,3,
4, 4, 4, 4, 4,4,],
'cycle': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,
0.0, 0.2,0.4, 0.6, 0.8,1.0,],
'Salary': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
1, 6, 3, 8,4,9,8,
6, 4,9,10,4,12,13,6,
1, 4,9,10,9,4,],
'Children': ['Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'No','No', 'Yes', 'Yes', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes',
'Yes', 'No','Yes', 'Yes', 'No','No', 'Yes','Yes',
'Yes', 'Yes', 'No','Yes', 'Yes','Yes',],
'Days': [141, 123, 128, 66, 66, 120, 141, 52,96, 120, 141, 52,
141, 96, 120,120, 141, 52,96,
141, 15,123, 128, 66, 120, 141, 141,
141, 141,123, 128, 66,67,],
}
#Convert to dataframe_1
df_1 = pd.DataFrame(data_set_1).assign(source='df_1')
#Convert to dataframe_2
df_2 = pd.DataFrame(data_set_2).assign(source='df_2')
fig = (
pd.concat([df_1, df_2])
.plot.line(x='cycle', y='Salary', color='source', title='Salary by cycle', facet_row='id', markers=True)
)
fig.show()
You should get interactive subplots like in the image below:
I hope this helps!
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论