英文:
R - add date to time from different column or add extra day if the time goes across midnight
问题
以下是已翻译好的内容:
我在这里找不到任何东西,这就是我的问题所在。由于数据或缺失数据,我需要操作一列以将日期放入时间列中。
我有一个包含EVENT_START_DTTM的日期和时间,以及EVENT_END_TM的时间的数据框。
我的逻辑是:
- (EVENT_START_DTTM <= EVENT_END_TM则为同一天)
- (EVENT_START_DTTM > EVENT_END_TM则跨越午夜,我需要将日期加一天)
事件开始日期时间 EVENT_END_TM
2020-01-03 09:34:13 10:33:37
2020-01-03 07:57:24 23:04:38
2019-12-04 23:42:40 03:38:33
2019-12-04 22:33:33 00:07:13
此外,我想计算分钟差异,期望的结果是:
事件开始日期时间 事件结束日期时间 差异分钟
2020-01-03 09:34:13 2020-01-03 10:33:37 59
2020-01-03 07:57:24 2020-01-03 23:04:38 907
2019-12-04 23:42:40 2019-12-05 03:38:33 237
2019-12-04 22:33:33 2019-12-05 00:07:13 95
以下是我的代码
library(data.table)
library(lubridate)
EVENT_START_DTTM <- c("2020-01-03 09:34:13", "2020-01-03 07:57:24","2019-12-04 23:42:40", "2019-12-04 22:33:33")
EVENT_END_DTTM <- c("2020-01-03 10:33:37", "2020-01-03 23:04:38","2019-12-05 03:38:33", "2019-12-05 00:07:13")
df_dttm <- data.frame(as.POSIXct(EVENT_START_DTTM), as.POSIXct(EVENT_END_DTTM ))
setnames(df_dttm, c("EVENT_START_DTTM","EVENT_END_DTTM"))
英文:
I could not find anything here hence my question. Due to data or missing data, I need to manipulate a column to get dates into the time column.
I have a data frame with date & time for EVENT_START_DTTM and just time for EVENT_END_TM.
My logic is that if:
-
(EVENT_START_DTTM <= EVENT_END_TM then is the same day)
-
(EVENT_START_DTTM > EVENT_END_TM then is across midnight and I need to add one day to the date)
EVENT_START_DTTM EVENT_END_TM
2020-01-03 09:34:13 10:33:37
2020-01-03 07:57:24 23:04:38
2019-12-04 23:42:40 03:38:33
2019-12-04 22:33:33 00:07:13
In addition, I want to calculate the difference in minutes and the desired results are:
EVENT_START_DTTM EVENT_END_DTTM Difference_min
2020-01-03 09:34:13 2020-01-03 10:33:37 59
2020-01-03 07:57:24 2020-01-03 23:04:38 907
2019-12-04 23:42:40 2019-12-05 03:38:33 237
2019-12-04 22:33:33 2019-12-05 00:07:13 95
Below is my code
library(data.table)
library(lubridate)
EVENT_START_DTTM <- c("2020-01-03 09:34:13", "2020-01-03 07:57:24","2019-12-04 23:42:40", "2019-12-04 22:33:33")
EVENT_END_DTTM <- c("2020-01-03 10:33:3", "2020-01-03 23:04:38","2019-12-05 03:38:33", "2019-12-05 00:07:13")
df_dttm <- data.frame(as.POSIXct(EVENT_START_DTTM), as.POSIXct(EVENT_END_DTTM ))
setnames(df_dttm, c("EVENT_START_DTTM","EVENT_END_DTTM") )
答案1
得分: 1
以下是您要翻译的内容:
您可以使用 `within()` 和数学函数。要比较小时,请使用 `substr()`。
d <- within(d, {
EVENT_START_DTTM=as.POSIXct(EVENT_START_DTTM)
EVENT_END_TM=as.POSIXct(paste(substr(EVENT_START_DTTM, 1, 10), EVENT_END_TM)) +
(as.numeric(substr(d[, 1], 12, 13)) > as.numeric(substr(d[, 2], 1, 2)))^1*24*60*60
Difference_min <- EVENT_END_TM - EVENT_START_DTTM
})
d
# EVENT_START_DTTM EVENT_END_TM Difference_min
# 1 2020-01-03 09:34:13 2020-01-03 10:33:37 59.40000 mins
# 2 2020-01-03 07:57:24 2020-01-03 23:04:38 907.23333 mins
# 3 2019-12-04 23:42:40 2019-12-05 03:38:33 235.88333 mins
# 4 2019-12-04 22:33:33 2019-12-05 00:07:13 93.66667 mins
*数据:*
d <- structure(list(EVENT_START_DTTM = structure(4:1, .Label = c("2019-12-04 22:33:33",
"2019-12-04 23:42:40", "2020-01-03 07:57:24", "2020-01-03 09:34:13"
), class = "factor"), EVENT_END_TM = structure(c(3L, 4L, 2L,
1L), .Label = c("00:07:13", "03:38:33", "10:33:37", "23:04:38"
), class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
英文:
You may use within()
and mathematics. To compare the hours as proposed use substr()
.
d <- within(d, {
EVENT_START_DTTM=as.POSIXct(EVENT_START_DTTM)
EVENT_END_TM=as.POSIXct(paste(substr(EVENT_START_DTTM, 1, 10), EVENT_END_TM)) +
(as.numeric(substr(d[, 1], 12, 13)) > as.numeric(substr(d[, 2], 1, 2)))^1*24*60*60
Difference_min <- EVENT_END_TM - EVENT_START_DTTM
})
d
# EVENT_START_DTTM EVENT_END_TM Difference_min
# 1 2020-01-03 09:34:13 2020-01-03 10:33:37 59.40000 mins
# 2 2020-01-03 07:57:24 2020-01-03 23:04:38 907.23333 mins
# 3 2019-12-04 23:42:40 2019-12-05 03:38:33 235.88333 mins
# 4 2019-12-04 22:33:33 2019-12-05 00:07:13 93.66667 mins
Data:
d <- structure(list(EVENT_START_DTTM = structure(4:1, .Label = c("2019-12-04 22:33:33",
"2019-12-04 23:42:40", "2020-01-03 07:57:24", "2020-01-03 09:34:13"
), class = "factor"), EVENT_END_TM = structure(c(3L, 4L, 2L,
1L), .Label = c("00:07:13", "03:38:33", "10:33:37", "23:04:38"
), class = "factor")), class = "data.frame", row.names = c(NA,
-4L))
答案2
得分: 1
你可以通过在dplyr中使用mutate函数和格式化日期时间来实现这一点。
library(data.table)
library(lubridate)
library(dplyr)
# 创建数据框
EVENT_START_DTTM <- c("2020-01-03 09:34:13", "2020-01-03 07:57:24",
"2019-12-04 23:42:40", "2019-12-04 22:33:33")
EVENT_END_DTTM <- c("10:33:3", "23:04:38",
"03:38:33", "00:07:13")
df_dttm <- data.frame(as.POSIXct(EVENT_START_DTTM), EVENT_END_DTTM, stringsAsFactors = FALSE)
setnames(df_dttm, c("EVENT_START_DTTM","EVENT_END_DTTM") )
result <-
df_dttm %>%
mutate(start_date = ymd(as.Date(EVENT_START_DTTM)),
start_time = format(strptime(EVENT_START_DTTM, "%Y-%m-%d %H:%M:%S"), "%H:%M:%S")) %>%
rowwise() %>%
mutate(end_date =
if_else(difftime(as.POSIXct(start_time, format = "%H:%M:%S"),
as.POSIXct(EVENT_END_DTTM, format = "%H:%M:%S"),
tz = "", units = "mins") < 0,
start_date, start_date + 1)) %>%
ungroup() %>%
mutate(EVENT_END_DTTM = as.POSIXct(paste(end_date, EVENT_END_DTTM)),
Difference_min = round(difftime(EVENT_END_DTTM, EVENT_START_DTTM, units="mins"), 0))
result
# # A tibble: 4 x 6
# EVENT_START_DTTM EVENT_END_DTTM start_date start_time end_date Difference_min
# <dttm> <dttm> <date> <chr> <date> <drtn>
# 2020-01-03 09:34:13 2020-01-03 10:33:03 2020-01-03 09:34:13 2020-01-03 59 mins
# 2020-01-03 07:57:24 2020-01-03 23:04:38 2020-01-03 07:57:24 2020-01-03 907 mins
# 2019-12-04 23:42:40 2019-12-05 03:38:33 2019-12-04 23:42:40 2019-12-05 236 mins
# 2019-12-04 22:33:33 2019-12-05 00:07:13 2019-12-04 22:33:33 2019-12-05 94 mins
英文:
You can do that by using mutate function in dplyr and formatting datetime
library(data.table)
library(lubridate)
library(dplyr)
# Creating dataframe
EVENT_START_DTTM <- c("2020-01-03 09:34:13", "2020-01-03 07:57:24",
"2019-12-04 23:42:40", "2019-12-04 22:33:33")
EVENT_END_DTTM <- c("10:33:3", "23:04:38",
"03:38:33", "00:07:13")
df_dttm <- data.frame(as.POSIXct(EVENT_START_DTTM), EVENT_END_DTTM, stringsAsFactors = FALSE)
setnames(df_dttm, c("EVENT_START_DTTM","EVENT_END_DTTM") )
result <-
df_dttm %>%
mutate(start_date = ymd(as.Date(EVENT_START_DTTM)),
start_time = format(strptime(EVENT_START_DTTM, "%Y-%m-%d %H:%M:%S"), "%H:%M:%S")) %>%
rowwise() %>%
mutate(end_date =
if_else(difftime(as.POSIXct(start_time, format = "%H:%M:%S"),
as.POSIXct(EVENT_END_DTTM, format = "%H:%M:%S"),
tz = "", units = "mins") < 0,
start_date, start_date + 1)) %>%
ungroup() %>%
mutate(EVENT_END_DTTM = as.POSIXct(paste(end_date, EVENT_END_DTTM)),
Difference_min = round(difftime(EVENT_END_DTTM, EVENT_START_DTTM, units="mins"), 0))
result
# # A tibble: 4 x 6
# EVENT_START_DTTM EVENT_END_DTTM start_date start_time end_date Difference_min
# <dttm> <dttm> <date> <chr> <date> <drtn>
# 2020-01-03 09:34:13 2020-01-03 10:33:03 2020-01-03 09:34:13 2020-01-03 59 mins
# 2020-01-03 07:57:24 2020-01-03 23:04:38 2020-01-03 07:57:24 2020-01-03 907 mins
# 2019-12-04 23:42:40 2019-12-05 03:38:33 2019-12-04 23:42:40 2019-12-05 236 mins
# 2019-12-04 22:33:33 2019-12-05 00:07:13 2019-12-04 22:33:33 2019-12-05 94 mins
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论