How to convert employee punch data to a 15-minute interval matrix?


I am attempting to take a table similar to the below raw data example with punch-in and punch-out times and convert it to a table (desired format below) which will allow the data to be easily used in Power BI area charts (this data will be plotted with transaction data). Essentially, I'd like to take raw punch time data and place it on a matrix to then count the number of employees "on the clock" per 15-minute interval.

我正在尝试将类似下面的原始数据示例中的表格与打卡时间转换为表格(下面是期望的格式),以便轻松在Power BI区域图中使用这些数据(这些数据将与交易数据一起绘制)。基本上,我想将原始的打卡时间数据放入矩阵中,然后计算每15分钟间隔内“在岗”的员工数量。

Raw Data:

Employee ID Punch In Punch Out Break In Break Out
1234 9 am 4 pm 12:30 pm 1 pm
1235 9:30 am 5 pm null null
1236 8 am 4 pm 11 am 11:45 am

Desired Format:

Employee ID 8 am 8:15 am 8:30 am 8:45 am 9 am 9:15 am 9:30 am 9:45 am
1234 1 1 1 1
1235 1 1
1236 1 1 1 1 1 1 1 1

Python Solution

import pandas as pd
from dateutil import parser
# Create a sample DataFrame with the raw punch data
data = {
&#39;Employee ID&#39;: [1234, 1235, 1236],
&#39;Punch In&#39;: [&#39;9 am&#39;, &#39;9:30 am&#39;, &#39;8 am&#39;],
&#39;Punch Out&#39;: [&#39;4 pm&#39;, &#39;5 pm&#39;, &#39;4 pm&#39;],
&#39;Break In&#39;: [&#39;12:30 pm&#39;, None, &#39;11 am&#39;],
&#39;Break Out&#39;: [&#39;1 pm&#39;, None, &#39;11:45 am&#39;]
df = pd.DataFrame(data)
# Define the time range for the matrix
start_time = pd.to_datetime(&#39;8:00 am&#39;).time()
end_time = pd.to_datetime(&#39;5:00 pm&#39;).time()
# Convert start and end times to Timestamp objects
start_timestamp = pd.Timestamp.combine(pd.to_datetime(&#39;today&#39;).date(), start_time)
end_timestamp = pd.Timestamp.combine(pd.to_datetime(&#39;today&#39;).date(), end_time)
# Create a time range with 15-minute intervals
time_range = pd.date_range(start=start_timestamp, end=end_timestamp, freq=&#39;15min&#39;).time
# Create an empty matrix DataFrame with time intervals as columns
matrix = pd.DataFrame(columns=time_range)
# Iterate over each row in the raw data DataFrame
for _, row in df.iterrows():
employee_id = row[&#39;Employee ID&#39;]
# Get the punch in and punch out times for the employee
punch_in = parser.parse(row[&#39;Punch In&#39;]).time()
punch_out = parser.parse(row[&#39;Punch Out&#39;]).time()
except ValueError:
print(f&quot;Invalid time format for Employee ID {employee_id}&quot;)
# Get the break in and break out times for the employee
break_in = parser.parse(row[&#39;Break In&#39;]).time() if row[&#39;Break In&#39;] else None
break_out = parser.parse(row[&#39;Break Out&#39;]).time() if row[&#39;Break Out&#39;] else None
# Create an empty row for the employee in the matrix
matrix_row = pd.Series(index=matrix.columns)
# Iterate over each time interval in the matrix
for time_interval in matrix.columns:
# Check if the employee is within the time interval
if punch_in &lt;= time_interval &lt; punch_out and (break_in is None or break_in &lt;= time_interval &lt; break_out):
matrix_row[time_interval] = 1
# Append the row to the matrix DataFrame
matrix_row.name = employee_id
matrix = matrix.append(matrix_row)
# Fill the NaN values with empty strings
matrix = matrix.fillna(&#39;&#39;)
# Show all columns
pd.set_option(&#39;display.max_columns&#39;, None)
# Show table of 1&#39;s

SQL Solution

-- Create the PunchData table
EmployeeID	INT
,PunchIn	VARCHAR(10)
,PunchOut	VARCHAR(10)
,BreakIn	VARCHAR(10)
,BreakOut	VARCHAR(10)
-- Insert sample data into the PunchData table
INSERT INTO PunchData (EmployeeID, PunchIn, PunchOut, BreakIn, BreakOut)
(1234, &#39;9 am&#39;, &#39;4 pm&#39;, &#39;12:30 pm&#39;, &#39;1 pm&#39;)
,(1235, &#39;9:30 am&#39;, &#39;5 pm&#39;, NULL, NULL)
,(1236, &#39;8 am&#39;, &#39;4 pm&#39;, &#39;11 am&#39;, &#39;11:45 am&#39;);
-- Define a CTE called TimeSlots that generates time values in 15-minute increments
WITH TimeSlots AS (
SELECT CAST(&#39;00:00&#39; AS TIME) AS [_Time]
SELECT DATEADD(minute, 15, [_Time])
FROM TimeSlots
WHERE [_Time] &lt; CAST(&#39;23:45&#39; AS TIME)
-- Define a CTE called Matrix that combines PunchData and TimeSlots
Matrix AS (
-- Check if PunchIn and PunchOut fall within the time slot
-- Check if BreakIn and BreakOut (if not NULL) fall within the time slot
(pd.PunchIn &lt;= ts.[_Time] 
AND pd.PunchOut &gt; ts.[_Time])  
pd.BreakIn IS NULL 
OR (
pd.BreakIn &lt;= ts.[_Time] 
AND pd.BreakOut &gt; ts.[_Time]
THEN &#39;1&#39;
ELSE &#39;0&#39;
END AS SlotValue
PunchData AS pd
TimeSlots AS ts
-- Pivot the Matrix data to generate desired columns for each time slot
,MAX(CASE WHEN [_Time] = &#39;09:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [09:00]
,MAX(CASE WHEN [_Time] = &#39;09:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [09:15]
,MAX(CASE WHEN [_Time] = &#39;09:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [09:30]
,MAX(CASE WHEN [_Time] = &#39;09:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [09:45]
,MAX(CASE WHEN [_Time] = &#39;10:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [10:00]
,MAX(CASE WHEN [_Time] = &#39;10:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [10:15]
,MAX(CASE WHEN [_Time] = &#39;10:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [10:30]
,MAX(CASE WHEN [_Time] = &#39;10:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [10:45]
,MAX(CASE WHEN [_Time] = &#39;11:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [11:00]
,MAX(CASE WHEN [_Time] = &#39;11:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [11:15]
,MAX(CASE WHEN [_Time] = &#39;11:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [11:30]
,MAX(CASE WHEN [_Time] = &#39;11:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [11:45]
,MAX(CASE WHEN [_Time] = &#39;12:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [12:00]
,MAX(CASE WHEN [_Time] = &#39;12:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [12:15]
,MAX(CASE WHEN [_Time] = &#39;12:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [12:30]
,MAX(CASE WHEN [_Time] = &#39;12:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [12:45]
,MAX(CASE WHEN [_Time] = &#39;13:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [13:00]
,MAX(CASE WHEN [_Time] = &#39;13:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [13:15]
,MAX(CASE WHEN [_Time] = &#39;13:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [13:30]
,MAX(CASE WHEN [_Time] = &#39;13:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [13:45]
,MAX(CASE WHEN [_Time] = &#39;14:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [14:00]
,MAX(CASE WHEN [_Time] = &#39;14:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [14:15]
,MAX(CASE WHEN [_Time] = &#39;14:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [14:30]
,MAX(CASE WHEN [_Time] = &#39;14:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [14:45]
,MAX(CASE WHEN [_Time] = &#39;15:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [15:00]
,MAX(CASE WHEN [_Time] = &#39;15:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [15:15]
,MAX(CASE WHEN [_Time] = &#39;15:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [15:30]
,MAX(CASE WHEN [_Time] = &#39;15:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [15:45]
,MAX(CASE WHEN [_Time] = &#39;16:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [16:00]
,MAX(CASE WHEN [_Time] = &#39;16:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [16:15]
,MAX(CASE WHEN [_Time] = &#39;16:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [16:30]
,MAX(CASE WHEN [_Time] = &#39;16:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [16:45]
,MAX(CASE WHEN [_Time] = &#39;17:00&#39; THEN SlotValue ELSE &#39;&#39; END) AS [17:00]
,MAX(CASE WHEN [_Time] = &#39;17:15&#39; THEN SlotValue ELSE &#39;&#39; END) AS [17:15]
,MAX(CASE WHEN [_Time] = &#39;17:30&#39; THEN SlotValue ELSE &#39;&#39; END) AS [17:30]
,MAX(CASE WHEN [_Time] = &#39;17:45&#39; THEN SlotValue ELSE &#39;&#39; END) AS [17:45]
-- Add more case statements for each 15-minute increment before/after if needed...

