英文:
Creating a "FootBall Field Chart"
问题
我有一个由以下列组成的DataFrame:
VP-ID,
MotivA_MotivatorA_InnerDriverA_PR,
MotivA_MotivatorA_InnerDriverB_PR,
MotivA_MotivatorB_InnerDriverA_PR,
MotivA_MotivatorB_InnerDriverB_PR,
MotivA_MotivatorC_InnerDriverA_PR,
MotivA_MotivatorC_InnerDriverB_PR,
MotivA_MotivatorD_InnerDriverA_PR,
MotivA_MotivatorD_InnerDriverB_PR,
...
MotivC_MotivatorA_InnerDriverA_PR,
MotivC_MotivatorA_InnerDriverB_PR,
MotivC_MotivatorB_InnerDriverA_PR,
MotivC_MotivatorB_InnerDriverB_PR,
MotivC_MotivatorC_InnerDriverA_PR,
MotivC_MotivatorC_InnerDriverB_PR,
MotivC_MotivatorD_InnerDriverA_PR,
MotivC_MotivatorD_InnerDriverB_PR。
在MotivatorA等符号后面当然是正确的术语(列名)。
这里,“PR”代表百分位排名(0-100)。
图表代表一个动机,它由四个动机带有两种变化的驱动器值组成,然后这些值来自InnerDriverA_PR和InnerDriverB_PR。
最终结果应该如下所示:
这是一个“足球场图”吗?
我该如何用Matplotlib实现这个图表?
最小可复制示例:
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
# 创建示例数据
columns = [
'MotivA_MotivatorA_InnerDriverA_PR',
'MotivA_MotivatorA_InnerDriverB_PR',
'MotivA_MotivatorB_InnerDriverA_PR',
'MotivA_MotivatorB_InnerDriverB_PR',
'MotivA_MotivatorC_InnerDriverA_PR',
'MotivA_MotivatorC_InnerDriverB_PR',
'MotivA_MotivatorD_InnerDriverA_PR',
'MotivA_MotivatorD_InnerDriverB_PR',
'MotivB_MotivatorA_InnerDriverA_PR',
'MotivB_MotivatorA_InnerDriverB_PR',
'MotivB_MotivatorB_InnerDriverA_PR',
'MotivB_MotivatorB_InnerDriverB_PR',
'MotivB_MotivatorC_InnerDriverA_PR',
'MotivB_MotivatorC_InnerDriverB_PR',
'MotivB_MotivatorD_InnerDriverA_PR',
'MotivB_MotivatorD_InnerDriverB_PR'
]
df = pd.DataFrame(columns=columns)
for i in range(1, 6):
df.loc[f'Subject_{i}'] = [random.randint(0, 100) for _ in range(len(columns))]
#────────────────────────────────────────────────
def create_horizontal_bar_chart(df, proband):
motives = sorted(set(col.split('_')[0] for col in df.columns))
for motive in motives:
columns = [col for col in df.columns if col.startswith(motive)]
data = df.loc[proband, columns].reset_index()
data['Motivator'] = data['index'].apply(lambda x: x.split('_')[1])
data['InnerDriver'] = data['index'].apply(lambda x: x.split('_')[2])
data['Value'] = data[proband]
data = data.drop(['index', proband], axis=1)
plt.figure(figsize=(10, 6))
sns.barplot(x='Value', y='Motivator', hue='InnerDriver', data=data)
plt.title(f'{proband} - {motive}')
plt.show()
create_horizontal_bar_chart(df, 'Subject_1')
英文:
I have a DataFrame consisting of the following columns:
VP-ID,
MotivA_MotivatorA_InnerDriverA_PR,
MotivA_MotivatorA_InnerDriverB_PR,
MotivA_MotivatorB_InnerDriverA_PR,
MotivA_MotivatorB_InnerDriverB_PR,
MotivA_MotivatorC_InnerDriverA_PR,
MotivA_MotivatorC_InnerDriverB_PR,
MotivA_MotivatorD_InnerDriverA_PR,
MotivA_MotivatorD_InnerDriverB_PR,
...
MotivC_MotivatorA_InnerDriverA_PR,
MotivC_MotivatorA_InnerDriverB_PR,
MotivC_MotivatorB_InnerDriverA_PR,
MotivC_MotivatorB_InnerDriverB_PR,
MotivC_MotivatorC_InnerDriverA_PR,
MotivC_MotivatorC_InnerDriverB_PR,
MotivC_MotivatorD_InnerDriverA_PR,
MotivC_MotivatorD_InnerDriverB_PR.
Behind the designations MotivatorA etc. are of course correct terms (column names).
Here, "PR" stands for Percentile Rank (0-100).
A graphic represents a motive, which consists of four motivators with two variations, which then have the values from InnerDriverA_PR and InnerDriverB_PR.
The final result should look like this:
Is this a "Football Field Chart"?
How can I implement this graph with Matplotlib?
Minimal reproducible example:
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
# Create example data
columns = [
'MotivA_MotivatorA_InnerDriverA_PR',
'MotivA_MotivatorA_InnerDriverB_PR',
'MotivA_MotivatorB_InnerDriverA_PR',
'MotivA_MotivatorB_InnerDriverB_PR',
'MotivA_MotivatorC_InnerDriverA_PR',
'MotivA_MotivatorC_InnerDriverB_PR',
'MotivA_MotivatorD_InnerDriverA_PR',
'MotivA_MotivatorD_InnerDriverB_PR',
'MotivB_MotivatorA_InnerDriverA_PR',
'MotivB_MotivatorA_InnerDriverB_PR',
'MotivB_MotivatorB_InnerDriverA_PR',
'MotivB_MotivatorB_InnerDriverB_PR',
'MotivB_MotivatorC_InnerDriverA_PR',
'MotivB_MotivatorC_InnerDriverB_PR',
'MotivB_MotivatorD_InnerDriverA_PR',
'MotivB_MotivatorD_InnerDriverB_PR'
]
df = pd.DataFrame(columns=columns)
for i in range(1, 6):
df.loc[f'Subject_{i}'] = [random.randint(0, 100) for _ in range(len(columns))]
#────────────────────────────────────────────────
def create_horizontal_bar_chart(df, proband):
motives = sorted(set(col.split('_')[0] for col in df.columns))
for motive in motives:
columns = [col for col in df.columns if col.startswith(motive)]
data = df.loc[proband, columns].reset_index()
data['Motivator'] = data['index'].apply(lambda x: x.split('_')[1])
data['InnerDriver'] = data['index'].apply(lambda x: x.split('_')[2])
data['Value'] = data[proband]
data = data.drop(['index', proband], axis=1)
plt.figure(figsize=(10, 6))
sns.barplot(x='Value', y='Motivator', hue='InnerDriver', data=data)
plt.title(f'{proband} - {motive}')
plt.show()
create_horizontal_bar_chart(df, 'Subject_1')
However, this creates the motivators as extra bars and is still far from how I would want it, as in the example above.
答案1
得分: 1
这是一个脊柱图表。问题在于它不会对齐您的条形图。因此,要做到这一点,您需要发挥创造力:
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
columns = [
'MotivA_MotivatorA_InnerDriverA_PR',
'MotivA_MotivatorA_InnerDriverB_PR',
'MotivA_MotivatorB_InnerDriverA_PR',
'MotivA_MotivatorB_InnerDriverB_PR',
'MotivA_MotivatorC_InnerDriverA_PR',
'MotivA_MotivatorC_InnerDriverB_PR',
'MotivA_MotivatorD_InnerDriverA_PR',
'MotivA_MotivatorD_InnerDriverB_PR',
'MotivB_MotivatorA_InnerDriverA_PR',
'MotivB_MotivatorA_InnerDriverB_PR',
'MotivB_MotivatorB_InnerDriverA_PR',
'MotivB_MotivatorB_InnerDriverB_PR',
'MotivB_MotivatorC_InnerDriverA_PR',
'MotivB_MotivatorC_InnerDriverB_PR',
'MotivB_MotivatorD_InnerDriverA_PR',
'MotivB_MotivatorD_InnerDriverB_PR'
]
df = pd.DataFrame(columns=columns)
for i in range(1, 6): # 5 Probanden
df.loc[f'Subject_{i}'] = [random.randint(0, 100) for _ in range(len(columns))]
def create_horizontal_bar_chart(df, proband):
motives = sorted(set(col.split('_')[0] for col in df.columns))
for motive in motives:
columns = [col for col in df.columns if col.startswith(motive)]
data = df[df.index == proband].reset_index()
# Rename the new column to "Subject"
data = data.rename(columns={"index": "Subject"})
# Melt the dataframe
data_melted = data.melt(id_vars=["Subject"], var_name="Motiv_Motivator_InnerDriver_PR", value_name="PR")
# Create new columns from the "Motiv_Motivator_InnerDriver_PR" column
data_melted[['Motiv', 'Motivator', 'InnerDriver', '_']] = data_melted['Motiv_Motivator_InnerDriver_PR'].str.split("_",expand=True)
data_melted = data_melted[data_melted['Motiv'] == motive]
# Drop unnecessary columns
data_melted = data_melted.drop(columns=['Motiv_Motivator_InnerDriver_PR', '_'])
# Reorder the columns
data_melted = data_melted[['Subject', 'Motiv', 'Motivator', 'InnerDriver', 'PR']]
# Pivot the table
data_pivot = pd.pivot_table(data_melted, values='PR', index=['Subject', 'Motiv', 'Motivator'],
columns='InnerDriver', aggfunc='first').reset_index()
data_pivot['InnerDriverA'] = -data_pivot['InnerDriverA']
data_pivot = data_pivot.sort_values('Motivator', ascending=False).reset_index(drop=True)
fig, ax = plt.subplots(figsize=(10, 8))
# Stacked bar chart
data_pivot.plot(kind='barh', x='Motivator', y=['InnerDriverA', 'InnerDriverB'],
ax=ax, stacked=True, color='#5fba7d', alpha=0.5, legend=False)
ax.set_xlabel('PR')
ax.axvline(0, color='grey', linewidth=4) # Add a vertical line at x=0
ax.set_xlim(-100, 100) # set x limit as -100 to 100
# Add horizontal grid lines every 25 units
ax.set_xticks(range(-100, 101, 25))
ax.grid(True, axis='x', linestyle='dotted')
# Adjust the x-axis tick labels to display all values as positive
ax.set_xticklabels([abs(x) for x in ax.get_xticks()], fontsize=16, color='white')
# Add y-axis labels
yticks = np.arange(len(data_pivot))
yticklabels_left = [f'{motive} InnerDriverA' for motive in data_pivot['Motivator']]
yticklabels_right = ['InnerDriverB'] * len(data_pivot)
ax.set_yticks(yticks)
ax.set_yticklabels(yticklabels_left, va='center', ha='right', fontsize=14, color='black')
# Calculate y-tick positions for right-side labels
split = len(data_pivot)
intervals = np.linspace(0, 1, split + 1) # Split the number line into specified number of intervals
yticks_right = (intervals[:-1] + intervals[1:]) / 2 # Compute the midpoints
# Add right-side y-axis labels
ax2 = ax.twinx()
ax2.set_yticks(yticks_right)
ax2.set_yticklabels(yticklabels_right, va='center', ha='left', fontsize=14, color='black')
# Remove x and y tick marks
ax.tick_params(axis='x', which='both', bottom=False, top=False)
ax.tick_params(axis='y', which='both', left=False, right=False)
ax2.tick_params(axis='y', which='both', left=False, right=False)
# Remove border around the axes
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
# Remove border around the axes
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['left'].set_visible(False)
# Add values inside the bars
for i, row in data_pivot.iterrows():
value_a = row['InnerDriverA']
value_b = row['InnerDriverB']
ax.text(value_a + 2, i, str(-value_a), va='center', ha='left', color='white', fontsize=18, fontweight='bold')
ax.text(value_b - 2, i, str(value_b), va='center', ha='right', color='white', fontsize=18, fontweight='bold')
# Create a rectangle to set the background for bottom x-axis tick labels
rect = plt.Rectangle((-.05, -0.08), 1.10, 0.08, transform=ax.transAxes, color='grey', clip_on
<details>
<summary>英文:</summary>
It's a spine chart. The issue with that is it'll not line up your bars. So to do that, you need to get creative:
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
columns = [
'MotivA_MotivatorA_InnerDriverA_PR',
'MotivA_MotivatorA_InnerDriverB_PR',
'MotivA_MotivatorB_InnerDriverA_PR',
'MotivA_MotivatorB_InnerDriverB_PR',
'MotivA_MotivatorC_InnerDriverA_PR',
'MotivA_MotivatorC_InnerDriverB_PR',
'MotivA_MotivatorD_InnerDriverA_PR',
'MotivA_MotivatorD_InnerDriverB_PR',
'MotivB_MotivatorA_InnerDriverA_PR',
'MotivB_MotivatorA_InnerDriverB_PR',
'MotivB_MotivatorB_InnerDriverA_PR',
'MotivB_MotivatorB_InnerDriverB_PR',
'MotivB_MotivatorC_InnerDriverA_PR',
'MotivB_MotivatorC_InnerDriverB_PR',
'MotivB_MotivatorD_InnerDriverA_PR',
'MotivB_MotivatorD_InnerDriverB_PR'
]
df = pd.DataFrame(columns=columns)
for i in range(1, 6): # 5 Probanden
df.loc[f'Subject_{i}'] = [random.randint(0, 100) for _ in range(len(columns))]
def create_horizontal_bar_chart(df, proband):
motives = sorted(set(col.split('_')[0] for col in df.columns))
for motive in motives:
columns = [col for col in df.columns if col.startswith(motive)]
data = df[df.index == proband].reset_index()
# Rename the new column to "Subject"
data = data.rename(columns = {"index": "Subject"})
# Melt the dataframe
data_melted = data.melt(id_vars=["Subject"], var_name="Motiv_Motivator_InnerDriver_PR", value_name="PR")
# Create new columns from the "Motiv_Motivator_InnerDriver_PR" column
data_melted[['Motiv', 'Motivator', 'InnerDriver', '_']] = data_melted['Motiv_Motivator_InnerDriver_PR'].str.split("_",expand=True)
data_melted = data_melted[data_melted['Motiv'] == motive]
# Drop unnecessary columns
data_melted = data_melted.drop(columns=['Motiv_Motivator_InnerDriver_PR', '_'])
# Reorder the columns
data_melted = data_melted[['Subject', 'Motiv', 'Motivator', 'InnerDriver', 'PR']]
# Pivot the table
data_pivot = pd.pivot_table(data_melted, values='PR', index=['Subject', 'Motiv', 'Motivator'],
columns='InnerDriver', aggfunc='first').reset_index()
data_pivot['InnerDriverA'] = -data_pivot['InnerDriverA']
data_pivot = data_pivot.sort_values('Motivator', ascending=False).reset_index(drop=True)
fig, ax = plt.subplots(figsize=(10, 8))
# Stacked bar chart
data_pivot.plot(kind='barh', x='Motivator', y=['InnerDriverA', 'InnerDriverB'],
ax=ax, stacked=True, color='#5fba7d', alpha=0.5, legend=False)
ax.set_xlabel('PR')
ax.axvline(0, color='grey', linewidth=4) # Add a vertical line at x=0
ax.set_xlim(-100, 100) # set x limit as -100 to 100
# Add horizontal grid lines every 25 units
ax.set_xticks(range(-100, 101, 25))
ax.grid(True, axis='x', linestyle='dotted')
# Adjust the x-axis tick labels to display all values as positive
ax.set_xticklabels([abs(x) for x in ax.get_xticks()], fontsize=16, color='white')
# Add y-axis labels
yticks = np.arange(len(data_pivot))
yticklabels_left = [f'{motive} InnerDriverA' for motive in data_pivot['Motivator']]
yticklabels_right = ['InnerDriverB'] * len(data_pivot)
ax.set_yticks(yticks)
ax.set_yticklabels(yticklabels_left, va='center', ha='right', fontsize=14, color='black')
# Calculate y-tick positions for right-side labels
split = len(data_pivot)
intervals = np.linspace(0, 1, split + 1) # Split the number line into specified number of intervals
yticks_right = (intervals[:-1] + intervals[1:]) / 2 # Compute the midpoints
# Add right-side y-axis labels
ax2 = ax.twinx()
ax2.set_yticks(yticks_right)
ax2.set_yticklabels(yticklabels_right, va='center', ha='left', fontsize=14, color='black')
# Remove x and y tick marks
ax.tick_params(axis='x', which='both', bottom=False, top=False)
ax.tick_params(axis='y', which='both', left=False, right=False)
ax2.tick_params(axis='y', which='both', left=False, right=False)
# Remove border around the axes
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
# Remove border around the axes
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['left'].set_visible(False)
# Add values inside the bars
for i, row in data_pivot.iterrows():
value_a = row['InnerDriverA']
value_b = row['InnerDriverB']
ax.text(value_a + 2, i, str(-value_a), va='center', ha='left', color='white', fontsize=18, fontweight='bold')
ax.text(value_b - 2, i, str(value_b),va='center', ha='right', color='white', fontsize=18, fontweight='bold')
# Create a rectangle to set the background for bottom x-axis tick labels
rect = plt.Rectangle((-.05, -0.08), 1.10, 0.08, transform=ax.transAxes, color='grey', clip_on=False)
ax.add_patch(rect)
plt.title(f'{proband} - {motive}')
plt.show()
create_horizontal_bar_chart(df, 'Subject_1')
Output:
[![enter image description here][1]][1]
and...
[![enter image description here][2]][2]
[1]: https://i.stack.imgur.com/kOVSL.png
[2]: https://i.stack.imgur.com/39jcb.png
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论