将事件绘制为单个条形图

huangapple go评论63阅读模式
英文:

Plotting events as a single bar barplot

问题

# Set the figure size for better visualization
plt.figure(figsize=(10, 6))

# Create a horizontal bar chart with start time on the y-axis and duration on the x-axis
plt.barh(df['start'], df['duration'], height=0.5)

# Set the y-axis label to be the start date
plt.ylabel('Start Date')

# Set the x-axis label to be the duration in minutes
plt.xlabel('Duration (minutes)')

# Customize the x-axis ticks and labels for better readability
plt.xticks(range(0, int(df['duration'].max()) + 1, 6))

# Add vertical lines at 6-hour intervals
for i in range(6, int(df['duration'].max()) + 1, 6):
    plt.axvline(x=i, color='gray', linestyle='--', linewidth=0.8)

# Show the plot
plt.show()
英文:

I have done some network testing, and have ended up with a CSV-file representing the dropouts in connection on this format:

seqstart,seqend,start date,start time,end date,end time,latency(ms)
23,58,20/02/2023,17:38:12.524622,20/02/2023,17:38:30.024620,17.499998
83,144,20/02/2023,17:38:42.524619,20/02/2023,17:39:13.024569,30.49995
177,187,20/02/2023,17:39:29.524621,20/02/2023,17:39:34.524625,5.000004
188,217,20/02/2023,17:39:35.024591,20/02/2023,17:39:49.524621,14.50003
4011,4044,20/02/2023,18:11:26.524624,20/02/2023,18:11:43.024625,16.500001
4131,4163,20/02/2023,18:12:26.524627,20/02/2023,18:12:42.524625,15.999998
4191,4223,20/02/2023,18:12:56.524627,20/02/2023,18:13:12.524627,16.0
4461,4523,20/02/2023,18:15:11.524626,20/02/2023,18:15:42.524626,31.0
16671,16733,20/02/2023,19:56:56.524634,20/02/2023,19:57:27.524628,30.999994

I want to illustrate this on a format similar to this:
将事件绘制为单个条形图

Together with ChatGPT I managed to get this, with the code:

将事件绘制为单个条形图

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('filename.csv')

# Combine start-date and start-time into a single datetime column
df['start'] = pd.to_datetime(df['start date'] + ' ' + df['start time'], format='%d/%m/%Y %H:%M:%S.%f')

# Combine end-date and end-time into a single datetime column
df['end'] = pd.to_datetime(df['end date'] + ' ' + df['end time'], format='%d/%m/%Y %H:%M:%S.%f')

# Calculate the duration of each event in minutes
df['duration'] = (df['end'] - df['start']).dt.total_seconds() / 60

# Sort the events by start time
df = df.sort_values(by='start')

# Create a horizontal bar chart with start time on the y-axis and duration on the x-axis
plt.barh(df['start'], df['duration'], height=0.5)

# Set the y-axis label to be the start date
plt.ylabel('Start Date')

# Set the x-axis label to be the duration in minutes
plt.xlabel('Duration (minutes)')

# Show the plot
plt.show()

How can I alter this code to get the illustration showed at the top? I have tried asking ChatGPT even further, but it did not give me any results

Optionally - Since the data is taken over a period of days. To make it more readable, maybe it could be possible to display 6 or 12 hours on the X-axis, and then make a new line/pipe? Like shown below:

将事件绘制为单个条形图

答案1

得分: 1

这假设您能够重新构建数据框,以便对于每一天,交替包含无丢失和有丢失的持续时间。有比丢失最多的那一天少的天数将用 np.nan 值填充。注意,每行的所有持续时间应该加起来等于 24 小时。

import pandas as pd
import numpy as np
from itertools import cycle
import matplotlib.pyplot as plt

df = pd.DataFrame([
    [10, 2, 1, 1, 10],
    [10, 3, 11, np.nan, np.nan],
], index=["day1", "day2"])

color_cycles = cycle(["green", "red"])
colors = [next(color_cycles) for _ in range(len(df.iloc[0]))]
df.plot.barh(stacked=True, color=colors)
plt.legend([])

将事件绘制为单个条形图

英文:

This assumes you manage to rebuild your dataframe to contain for each day alternating the duration without dopouts and the duration with dropouts. The days with fewer dropouts than the day with most dropouts are filled with np.nan values. Note, All durations per row should add up to 24h.

import pandas as pd
import numpy as np
from itertools import cycle
import matplotlib.pyplot as plt

df = pd.DataFrame([
    [10, 2, 1, 1, 10],
    [10, 3, 11, np.nan, np.nan],
], index=["day1", "day2"])

color_cycles = cycle(["green", "red"])
colors = [next(colors) for _ in range(len(df[0]))]
df.plot.barh(stacked=True, color=colors)
plt.legend([])

将事件绘制为单个条形图

答案2

得分: 1

以下是代码部分的翻译:

import pandas as pd
from datetime import timedelta
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv(
    "data.csv",
    parse_dates=["start date", "end date"]
)

# 为开始时间添加新列
df["start minute of the day"] = df["start time"].apply(
    lambda x: timedelta(
        hours=int(x.split(":")[0]),
        minutes=int(x.split(":")[1]),
        seconds=float(x.split(":")[2])
    ).total_seconds() / 60
)

# 为结束时间添加新列
df["end minute of the day"] = df["end time"].apply(
    lambda x: timedelta(
        hours=int(x.split(":")[0]),
        minutes=int(x.split(":")[1]),
        seconds=float(x.split(":")[2])
    ).total_seconds() / 60
)

# 有序的不同日期列表
days = list(sorted(df["start date"].unique()))

# 创建一个图像
bar_height = 20  # 像素
minute_width = 1  # 像素
color_green = np.array([0, 191, 84])
color_red = np.array([255, 20, 64])
minimum_error_width = 50  # 根据需要减少

img_width = minute_width * 24 * 60  # 一整天
img_height = bar_height * len(days)
img = np.ones((img_height, img_width, 3), np.uint8)

# 填充所有的绿色
img = img * color_green

for img_row_idx, day in enumerate(days):
    df_filtered = df[df['start date'] == day]
    for _, row in df_filtered.iterrows():
        # 如果选择的时间间隔不是分钟,根据需要调整 round
        start_index = int(row["start minute of the day"])
        end_index = int(row["end minute of the day"])
        end_index = min(max(end_index, start_index + minimum_error_width), img_width)
        # 填充红色
        img[
            img_row_idx * bar_height:(img_row_idx + 1) * bar_height,
            start_index:end_index
        ] = color_red

plt.imshow(img)

plt.yticks(
    ticks=np.linspace(bar_height / 2, bar_height * len(days) - bar_height / 2, num=len(days)),
    labels=[str(d)[:10] for d in days]
)
plt.xlabel("一天中的分钟")
plt.ylabel("日期")
plt.savefig("output.jpg")

请注意,这是代码的翻译部分,不包括注释。

英文:

This is a version that works with images.

Please note: There is a minimum error width, because your errors are so short in duration, that they will be hardly ever visible. Adjust to your needs. Also: If an error starts at day one and ends at day two, the error is only shown for day one.

将事件绘制为单个条形图

import pandas as pd
from datetime import timedelta
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv(
"data.csv",
parse_dates=["start date", "end date"]
)
# add new column for start
df["start minute of the day"] = df["start time"].apply(
lambda x: timedelta(
hours=int(x.split(":")[0]),
minutes=int(x.split(":")[1]),
seconds=float(x.split(":")[2])
).total_seconds() / 60
)
# add new column for end
df["end minute of the day"] = df["end time"].apply(
lambda x: timedelta(
hours=int(x.split(":")[0]),
minutes=int(x.split(":")[1]),
seconds=float(x.split(":")[2])
).total_seconds() / 60
)
# list of (ordered) distinct days
days = list(sorted(df["start date"].unique()))
# create an image
bar_height = 20  # px
minute_width = 1  # px
color_green = np.array([0, 191, 84])
color_red = np.array([255, 20, 64])
minimum_error_width = 50  # reduce to your need
img_width = minute_width * 24 * 60  # one full day
img_height = bar_height * len(days)
img = np.ones((img_height, img_width, 3), np.uint8)
# fill all green
img = img * color_green
for img_row_idx, day in enumerate(days):
df_filtered = df[df['start date'] == day]
for _, row in df_filtered.iterrows():
# if you choose other intervals than minutes, adjust round accordingly
start_index = int(row["start minute of the day"])
end_index = int(row["end minute of the day"])
end_index = min(max(end_index, start_index+minimum_error_width), img_width)
# fill red
img[
img_row_idx*bar_height:(img_row_idx+1)*bar_height,
start_index:end_index
] = color_red
plt.imshow(img)
plt.yticks(
ticks=np.linspace(bar_height/2, bar_height*len(days)-bar_height/2,  num=len(days)),
labels=[str(d)[:10] for d in days]
)
plt.xlabel("minute of day")
plt.ylabel("day")
plt.savefig("output.jpg")

huangapple
  • 本文由 发表于 2023年3月9日 21:47:28
  • 转载请务必保留本文链接:https://go.coder-hub.com/75685479.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定