英文:
How to convert employee punch data to a 15-minute interval matrix?
问题
I am attempting to take a table similar to the below raw data example with punch-in and punch-out times and convert it to a table (desired format below) which will allow the data to be easily used in Power BI area charts (this data will be plotted with transaction data). Essentially, I'd like to take raw punch time data and place it on a matrix to then count the number of employees "on the clock" per 15-minute interval.
我正在尝试将类似下面的原始数据示例中的表格与打卡时间转换为表格(下面是期望的格式),以便轻松在Power BI区域图中使用这些数据(这些数据将与交易数据一起绘制)。基本上,我想将原始的打卡时间数据放入矩阵中,然后计算每15分钟间隔内“在岗”的员工数量。
Raw Data:
Employee ID | Punch In | Punch Out | Break In | Break Out |
---|---|---|---|---|
1234 | 9 am | 4 pm | 12:30 pm | 1 pm |
1235 | 9:30 am | 5 pm | null | null |
1236 | 8 am | 4 pm | 11 am | 11:45 am |
Desired Format:
Employee ID | 8 am | 8:15 am | 8:30 am | 8:45 am | 9 am | 9:15 am | 9:30 am | 9:45 am |
---|---|---|---|---|---|---|---|---|
1234 | 1 | 1 | 1 | 1 | ||||
1235 | 1 | 1 | ||||||
1236 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
英文:
I am attempting to take a table similar to the below raw data example with punch-in and punch-out times and convert it to a table (desired format below) which will allow the data to be easily used in Power BI area charts (this data will be plotted with transaction data). Essentially, I'd like to take raw punch time data and place it on a matrix to then count the number of employees "on the clock" per 15-minute interval.
I am open to the toolset to complete this in the simplest manner. Excel, Python, SQL Server, and Power BI are my strongest platforms. Also, open to a better way of achieving this if there's one out there.
Raw Data:
Employee ID | Punch In | Punch Out | Break In | Break Out |
---|---|---|---|---|
1234 | 9 am | 4 pm | 12:30 pm | 1 pm |
1235 | 9:30 am | 5 pm | null | null |
1236 | 8 am | 4 pm | 11 am | 11:45 am |
Desired Format:
Employee ID | 8 am | 8:15 am | 8:30 am | 8:45 am | 9 am | 9:15 am | 9:30 am | 9:45 am |
---|---|---|---|---|---|---|---|---|
1234 | 1 | 1 | 1 | 1 | ||||
1235 | 1 | 1 | ||||||
1236 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
答案1
得分: 1
以下是Python解决方案的翻译:
import pandas as pd
from dateutil import parser
# 创建一个带有原始打卡数据的示例DataFrame
data = {
'员工编号': [1234, 1235, 1236],
'上班打卡时间': ['上午9点', '上午9点30分', '上午8点'],
'下班打卡时间': ['下午4点', '下午5点', '下午4点'],
'午休开始时间': ['中午12点30分', None, '上午11点'],
'午休结束时间': ['下午1点', None, '上午11点45分']
}
df = pd.DataFrame(data)
# 定义矩阵的时间范围
开始时间 = pd.to_datetime('上午8点').time()
结束时间 = pd.to_datetime('下午5点').time()
# 将开始和结束时间转换为Timestamp对象
开始时间戳 = pd.Timestamp.combine(pd.to_datetime('今天').date(), 开始时间)
结束时间戳 = pd.Timestamp.combine(pd.to_datetime('今天').date(), 结束时间)
# 创建一个以15分钟为间隔的时间范围
时间范围 = pd.date_range(start=开始时间戳, end=结束时间戳, freq='15min').time
# 创建一个带有时间间隔作为列的空矩阵DataFrame
矩阵 = pd.DataFrame(columns=时间范围)
# 遍历原始数据DataFrame中的每一行
for _, 行 in df.iterrows():
员工编号 = 行['员工编号']
# 获取员工的上班打卡和下班打卡时间
try:
上班打卡时间 = parser.parse(行['上班打卡时间']).time()
下班打卡时间 = parser.parse(行['下班打卡时间']).time()
except ValueError:
print(f"员工编号 {员工编号} 的时间格式无效")
continue
# 获取员工的午休开始和午休结束时间(如果不为空)
午休开始时间 = parser.parse(行['午休开始时间']).time() if 行['午休开始时间'] else None
午休结束时间 = parser.parse(行['午休结束时间']).time() if 行['午休结束时间'] else None
# 在矩阵中为员工创建一个空行
矩阵行 = pd.Series(index=矩阵.columns)
# 遍历矩阵中的每个时间间隔
for 时间间隔 in 矩阵.columns:
# 检查员工是否在该时间间隔内
if 上班打卡时间 <= 时间间隔 < 下班打卡时间 and (午休开始时间 is None or 午休开始时间 <= 时间间隔 < 午休结束时间):
矩阵行[时间间隔] = 1
# 将行添加到矩阵DataFrame中
矩阵行.name = 员工编号
矩阵 = 矩阵.append(矩阵行)
# 用空字符串填充NaN值
矩阵 = 矩阵.fillna('')
# 显示所有列
pd.set_option('display.max_columns', None)
# 显示包含1的表格
print(矩阵)
SQL解决方案部分不需要翻译。如果您需要任何其他帮助,请随时告诉我。
英文:
Python Solution
import pandas as pd
from dateutil import parser
# Create a sample DataFrame with the raw punch data
data = {
'Employee ID': [1234, 1235, 1236],
'Punch In': ['9 am', '9:30 am', '8 am'],
'Punch Out': ['4 pm', '5 pm', '4 pm'],
'Break In': ['12:30 pm', None, '11 am'],
'Break Out': ['1 pm', None, '11:45 am']
}
df = pd.DataFrame(data)
# Define the time range for the matrix
start_time = pd.to_datetime('8:00 am').time()
end_time = pd.to_datetime('5:00 pm').time()
# Convert start and end times to Timestamp objects
start_timestamp = pd.Timestamp.combine(pd.to_datetime('today').date(), start_time)
end_timestamp = pd.Timestamp.combine(pd.to_datetime('today').date(), end_time)
# Create a time range with 15-minute intervals
time_range = pd.date_range(start=start_timestamp, end=end_timestamp, freq='15min').time
# Create an empty matrix DataFrame with time intervals as columns
matrix = pd.DataFrame(columns=time_range)
# Iterate over each row in the raw data DataFrame
for _, row in df.iterrows():
employee_id = row['Employee ID']
# Get the punch in and punch out times for the employee
try:
punch_in = parser.parse(row['Punch In']).time()
punch_out = parser.parse(row['Punch Out']).time()
except ValueError:
print(f"Invalid time format for Employee ID {employee_id}")
continue
# Get the break in and break out times for the employee
break_in = parser.parse(row['Break In']).time() if row['Break In'] else None
break_out = parser.parse(row['Break Out']).time() if row['Break Out'] else None
# Create an empty row for the employee in the matrix
matrix_row = pd.Series(index=matrix.columns)
# Iterate over each time interval in the matrix
for time_interval in matrix.columns:
# Check if the employee is within the time interval
if punch_in <= time_interval < punch_out and (break_in is None or break_in <= time_interval < break_out):
matrix_row[time_interval] = 1
# Append the row to the matrix DataFrame
matrix_row.name = employee_id
matrix = matrix.append(matrix_row)
# Fill the NaN values with empty strings
matrix = matrix.fillna('')
# Show all columns
pd.set_option('display.max_columns', None)
# Show table of 1's
print(matrix)
SQL Solution
-- Create the PunchData table
CREATE TABLE PunchData (
EmployeeID INT
,PunchIn VARCHAR(10)
,PunchOut VARCHAR(10)
,BreakIn VARCHAR(10)
,BreakOut VARCHAR(10)
);
-- Insert sample data into the PunchData table
INSERT INTO PunchData (EmployeeID, PunchIn, PunchOut, BreakIn, BreakOut)
VALUES
(1234, '9 am', '4 pm', '12:30 pm', '1 pm')
,(1235, '9:30 am', '5 pm', NULL, NULL)
,(1236, '8 am', '4 pm', '11 am', '11:45 am');
-- Define a CTE called TimeSlots that generates time values in 15-minute increments
WITH TimeSlots AS (
SELECT CAST('00:00' AS TIME) AS [_Time]
UNION ALL
SELECT DATEADD(minute, 15, [_Time])
FROM TimeSlots
WHERE [_Time] < CAST('23:45' AS TIME)
),
-- Define a CTE called Matrix that combines PunchData and TimeSlots
Matrix AS (
SELECT
pd.EmployeeID
,ts.[_Time]
,CASE
-- Check if PunchIn and PunchOut fall within the time slot
-- Check if BreakIn and BreakOut (if not NULL) fall within the time slot
WHEN (
(pd.PunchIn <= ts.[_Time]
AND pd.PunchOut > ts.[_Time])
AND (
pd.BreakIn IS NULL
OR (
pd.BreakIn <= ts.[_Time]
AND pd.BreakOut > ts.[_Time]
)
)
)
THEN '1'
ELSE '0'
END AS SlotValue
FROM
PunchData AS pd
CROSS JOIN
TimeSlots AS ts
)
-- Pivot the Matrix data to generate desired columns for each time slot
SELECT
EmployeeID
,MAX(CASE WHEN [_Time] = '09:00' THEN SlotValue ELSE '' END) AS [09:00]
,MAX(CASE WHEN [_Time] = '09:15' THEN SlotValue ELSE '' END) AS [09:15]
,MAX(CASE WHEN [_Time] = '09:30' THEN SlotValue ELSE '' END) AS [09:30]
,MAX(CASE WHEN [_Time] = '09:45' THEN SlotValue ELSE '' END) AS [09:45]
,MAX(CASE WHEN [_Time] = '10:00' THEN SlotValue ELSE '' END) AS [10:00]
,MAX(CASE WHEN [_Time] = '10:15' THEN SlotValue ELSE '' END) AS [10:15]
,MAX(CASE WHEN [_Time] = '10:30' THEN SlotValue ELSE '' END) AS [10:30]
,MAX(CASE WHEN [_Time] = '10:45' THEN SlotValue ELSE '' END) AS [10:45]
,MAX(CASE WHEN [_Time] = '11:00' THEN SlotValue ELSE '' END) AS [11:00]
,MAX(CASE WHEN [_Time] = '11:15' THEN SlotValue ELSE '' END) AS [11:15]
,MAX(CASE WHEN [_Time] = '11:30' THEN SlotValue ELSE '' END) AS [11:30]
,MAX(CASE WHEN [_Time] = '11:45' THEN SlotValue ELSE '' END) AS [11:45]
,MAX(CASE WHEN [_Time] = '12:00' THEN SlotValue ELSE '' END) AS [12:00]
,MAX(CASE WHEN [_Time] = '12:15' THEN SlotValue ELSE '' END) AS [12:15]
,MAX(CASE WHEN [_Time] = '12:30' THEN SlotValue ELSE '' END) AS [12:30]
,MAX(CASE WHEN [_Time] = '12:45' THEN SlotValue ELSE '' END) AS [12:45]
,MAX(CASE WHEN [_Time] = '13:00' THEN SlotValue ELSE '' END) AS [13:00]
,MAX(CASE WHEN [_Time] = '13:15' THEN SlotValue ELSE '' END) AS [13:15]
,MAX(CASE WHEN [_Time] = '13:30' THEN SlotValue ELSE '' END) AS [13:30]
,MAX(CASE WHEN [_Time] = '13:45' THEN SlotValue ELSE '' END) AS [13:45]
,MAX(CASE WHEN [_Time] = '14:00' THEN SlotValue ELSE '' END) AS [14:00]
,MAX(CASE WHEN [_Time] = '14:15' THEN SlotValue ELSE '' END) AS [14:15]
,MAX(CASE WHEN [_Time] = '14:30' THEN SlotValue ELSE '' END) AS [14:30]
,MAX(CASE WHEN [_Time] = '14:45' THEN SlotValue ELSE '' END) AS [14:45]
,MAX(CASE WHEN [_Time] = '15:00' THEN SlotValue ELSE '' END) AS [15:00]
,MAX(CASE WHEN [_Time] = '15:15' THEN SlotValue ELSE '' END) AS [15:15]
,MAX(CASE WHEN [_Time] = '15:30' THEN SlotValue ELSE '' END) AS [15:30]
,MAX(CASE WHEN [_Time] = '15:45' THEN SlotValue ELSE '' END) AS [15:45]
,MAX(CASE WHEN [_Time] = '16:00' THEN SlotValue ELSE '' END) AS [16:00]
,MAX(CASE WHEN [_Time] = '16:15' THEN SlotValue ELSE '' END) AS [16:15]
,MAX(CASE WHEN [_Time] = '16:30' THEN SlotValue ELSE '' END) AS [16:30]
,MAX(CASE WHEN [_Time] = '16:45' THEN SlotValue ELSE '' END) AS [16:45]
,MAX(CASE WHEN [_Time] = '17:00' THEN SlotValue ELSE '' END) AS [17:00]
,MAX(CASE WHEN [_Time] = '17:15' THEN SlotValue ELSE '' END) AS [17:15]
,MAX(CASE WHEN [_Time] = '17:30' THEN SlotValue ELSE '' END) AS [17:30]
,MAX(CASE WHEN [_Time] = '17:45' THEN SlotValue ELSE '' END) AS [17:45]
-- Add more case statements for each 15-minute increment before/after if needed...
FROM
Matrix
GROUP BY
EmployeeID;
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论