英文:
How to plot datetime.time on x-axis
问题
import pandas as pd
import matplotlib.pyplot as plt
# 示例数据框
data = {'Id': [1503960366, 1503960366, 1503960366, 1503960366, 1503960366],
        'ActivityHour': ['2016-04-12 00:00:00', '2016-04-12 01:00:00', '2016-04-12 02:00:00', '2016-04-12 03:00:00', '2016-04-12 04:00:00'],
        'Calories': [81, 61, 59, 47, 48],
        'TotalIntensity': [20, 8, 7, 0, 0],
        'AverageIntensity': [0.333333, 0.133333, 0.116667, 0.0, 0.0],
        'StepTotal': [373, 160, 151, 0, 0]}
hourly_activity = pd.DataFrame(data)
hourly_activity['time'] = [d.time() for d in hourly_activity['ActivityHour']]
hourly_activity['date'] = [d.date() for d in hourly_activity['ActivityHour']]
hours_vs_steps = hourly_activity.copy()
hours_vs_steps = hours_vs_steps[['time', 'StepTotal', 'Calories']]
hours_vs_steps = hours_vs_steps.groupby(['time']).sum()
hours_vs_steps.reset_index(inplace=True)
# 初始数据框
Id         ActivityHour  Calories  TotalIntensity  AverageIntensity  StepTotal
0  1503960366  2016-04-12 00:00:00        81              20          0.333333        373
1  1503960366  2016-04-12 01:00:00        61               8          0.133333        160
2  1503960366  2016-04-12 02:00:00        59               7          0.116667        151
3  1503960366  2016-04-12 03:00:00        47               0          0.000000          0
4  1503960366  2016-04-12 04:00:00        48               0          0.000000          0
# 使用.groupby后的数据框
     time  StepTotal  Calories
0  00:00:00      39404     67066
1  01:00:00      21555     65464
2  02:00:00      15964     64551
3  03:00:00       5996     63013
4  04:00:00      11836     63620
# 绘制图表时的代码
plt.plot(hours_vs_steps.time, hours_vs_steps.StepTotal)
结果会导致以下错误:
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[28], line 1
----> 1 plt.plot(hours_vs_steps.time, hours_vs_steps.StepTotal)
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\pyplot.py:2812, in plot(scalex, scaley, data, *args, **kwargs)
   2810 @_copy_docstring_and_deprecators(Axes.plot)
   2811 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2812     return gca().plot(
   2813         *args, scalex=scalex, scaley=scaley,
   2814         **({"data": data} if data is not None else {}), **kwargs)
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:1690, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
   1688 lines = [*self._get_lines(*args, data=data, **kwargs)]
   1689 for line in lines:
-> 1690     self.add_line(line)
   1691 if scalex:
   1692     self._request_autoscale_view("x")
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_base.py:2304, in _AxesBase.add_line(self, line)
   2301 if line.get_clip_path() is None:
   2302     line.set_clip_path(self.patch)
-> 2304 self._update_line_limits(line)
   2305 if not line.get_label():
   2306     line.set_label(f'_child{len(self._children)}')
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_base.py:2327, in _AxesBase._update_line_limits(self, line)
   2323 def _update_line_limits(self, line):
   2324     """
   2325     Figures out the data limit of the given line, updating self.dataLim.
   2326     """
-> 2327     path = line.get_path()
   2328     if path.vertices.size == 0:
   2329         return
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\lines.py:1029, in Line2D.get_path(self)
   1027 """Return the `~matplotlib.path.Path` associated with this line."""
   1028 if self._invalidy or self._invalidx:
-> 1029     self.recache()
   1030 return self._path
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\lines.py:657, in Line2D.recache(self, always)
    655 if always or self._invalidx:
    656     xconv = self.convert_xunits(self._xorig)
--> 657     x = _to_unmasked_float_array(xconv).ravel()
    658 else:
    659     x = self._x
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\cbook\__init__.py:1335, in _to_unmasked_float_array(x)
   1333     return np.ma.asarray(x, float).filled(np.nan)
   1334 else:
-> 1335     return np.asarray(x, float)
TypeError: float() argument must be a string or a real number, not 'datetime.time'
英文:
I'm trying to compare the usage of a device during certain hours. My dataset provided a date-time format (YYYY/MM/D HH:MM:SS) that I split in two columns and then grouped by the new time column to get the summary of steps per hour.
import pandas as pd
import matplotlib.pyplot as plt
# sample dataframe
data = {'Id': [1503960366, 1503960366, 1503960366, 1503960366, 1503960366],
        'ActivityHour': ['2016-04-12 00:00:00', '2016-04-12 01:00:00', '2016-04-12 02:00:00', '2016-04-12 03:00:00', '2016-04-12 04:00:00'],
        'Calories': [81, 61, 59, 47, 48],
        'TotalIntensity': [20, 8, 7, 0, 0],
        'AverageIntensity': [0.333333, 0.133333, 0.116667, 0.0, 0.0],
        'StepTotal': [373, 160, 151, 0, 0]}
hourly_activity = pd.DataFrame(data)
hourly_activity['time'] = [d.time() for d in hourly_activity['ActivityHour']]
hourly_activity['date'] = [d.date() for d in hourly_activity['ActivityHour']]
hours_vs_steps = hourly_activity.copy()
hours_vs_steps = hours_vs_steps[['time', 'StepTotal', 'Calories']]
hours_vs_steps = hours_vs_steps.groupby(['time']).sum()
hours_vs_steps.reset_index(inplace=True)
Initial DataFrame:
           Id         ActivityHour  Calories  TotalIntensity  AverageIntensity  StepTotal
0  1503960366  2016-04-12 00:00:00        81              20          0.333333        373
1  1503960366  2016-04-12 01:00:00        61               8          0.133333        160
2  1503960366  2016-04-12 02:00:00        59               7          0.116667        151
3  1503960366  2016-04-12 03:00:00        47               0          0.000000          0
4  1503960366  2016-04-12 04:00:00        48               0          0.000000          0
After .groupby:
	time	StepTotal	Calories
0	00:00:00	39404	67066
1	01:00:00	21555	65464
2	02:00:00	15964	64551
3	03:00:00	5996	63013
4	04:00:00	11836	63620
I tried many times to plot the "StepTotal column as y_axis while using the time columns as x_axis but I wasn't able to do it either using matplotlib or seaborn.
Also, tried to plot the time when had it as index using hours_vs_steps.index but didn't work.
plt.plot(hours_vs_steps.time, hours_vs_steps.StepTotal)
Results in the following error
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[28], line 1
----> 1 plt.plot(hours_vs_steps.time, hours_vs_steps.StepTotal)
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\pyplot.py:2812, in plot(scalex, scaley, data, *args, **kwargs)
   2810 @_copy_docstring_and_deprecators(Axes.plot)
   2811 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2812     return gca().plot(
   2813         *args, scalex=scalex, scaley=scaley,
   2814         **({"data": data} if data is not None else {}), **kwargs)
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:1690, in Axes.plot(self, scalex, scaley, data, *args, **kwargs)
   1688 lines = [*self._get_lines(*args, data=data, **kwargs)]
   1689 for line in lines:
-> 1690     self.add_line(line)
   1691 if scalex:
   1692     self._request_autoscale_view("x")
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_base.py:2304, in _AxesBase.add_line(self, line)
   2301 if line.get_clip_path() is None:
   2302     line.set_clip_path(self.patch)
-> 2304 self._update_line_limits(line)
   2305 if not line.get_label():
   2306     line.set_label(f'_child{len(self._children)}')
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_base.py:2327, in _AxesBase._update_line_limits(self, line)
   2323 def _update_line_limits(self, line):
   2324     """
   2325     Figures out the data limit of the given line, updating self.dataLim.
   2326     """
-> 2327     path = line.get_path()
   2328     if path.vertices.size == 0:
   2329         return
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\lines.py:1029, in Line2D.get_path(self)
   1027 """Return the `~matplotlib.path.Path` associated with this line."""
   1028 if self._invalidy or self._invalidx:
-> 1029     self.recache()
   1030 return self._path
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\lines.py:657, in Line2D.recache(self, always)
    655 if always or self._invalidx:
    656     xconv = self.convert_xunits(self._xorig)
--> 657     x = _to_unmasked_float_array(xconv).ravel()
    658 else:
    659     x = self._x
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\cbook\__init__.py:1335, in _to_unmasked_float_array(x)
   1333     return np.ma.asarray(x, float).filled(np.nan)
   1334 else:
-> 1335     return np.asarray(x, float)
TypeError: float() argument must be a string or a real number, not 'datetime.time'
答案1
得分: 2
- 使用 
pandas.DataFrame.plot直接绘制数据框,该方法使用matplotlib作为后端。 - 使用 
.dt访问器 提取时间和日期组件,而不是使用列表推导。 - 在 
python 3.11.3、pandas 2.0.1、matplotlib 3.7.1中测试通过 
import pandas as pd
# 假设有名为 df 的样本数据框
df = pd.DataFrame(data)
# 将列转换为 datetime[ns] 类型
df.ActivityHour = pd.to_datetime(df.ActivityHour)
# 根据需要提取时间和日期
df['time'] = df.ActivityHour.dt.time
df['date'] = df.ActivityHour.dt.date
# 分组并求和
dfg = df.groupby(['time'])[['StepTotal', 'Calories']].sum()
# 使用 pandas.DataFrame.plot 绘制图表
ax = dfg.plot(secondary_y='Calories', figsize=(9, 5))
英文:
- Plot the dataframe directly with 
pandas.DataFrame.plot, which usesmatplotlibas the backend. - Use the 
.dtaccessor to extract the time and date components, not a list comprehension. - Tested in 
python 3.11.3,pandas 2.0.1,matplotlib 3.7.1 
import pandas as pd
# given the sample dataframe as df
df = pd.DataFrame(data)
# convert the column to a datetime[ns] Dtype
df.ActivityHour = pd.to_datetime(df.ActivityHour)
# extract the time and date as needed
df['time'] = df.ActivityHour.dt.time
df['date'] = df.ActivityHour.dt.date
# group and sum
dfg = df.groupby(['time'])[['StepTotal', 'Calories']].sum()
# plot with pandas.DataFrame.plot
ax = dfg.plot(secondary_y='Calories', figsize=(9, 5))
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。



评论