英文:
How to get 15-rows average Date and Time column in output file from input data?
问题
你的代码中有一些问题,首先,在你的代码中,似乎有一些HTML实体编码(如<
和>
)和HTML实体引用(如"
),需要替换成正常的R代码。另外,你提到输出中的日期和时间列出现了NA值,这可能需要进一步检查你的数据或代码。
以下是你提供的代码的翻译(已经替换了HTML实体编码和引用),以及你所期望的输出格式:
# 从`Date/Time`列中分离日期和时间
x <- df %>%
separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
# 按每15行分组并计算每组的均值
mn <- x %>%
group_by(group = as.integer(gl(n(), 15, n()))) %>%
summarise_all(funs(mean))
# 将结果写入CSV文件
write.csv(mn, 'C:/Users/Alexia/Desktop/Test/15row.csv')
至于你期望的输出格式,你需要将日期和时间合并为一个列,并确保时间格式为"hh:mm:ss+00",列名为"time_sp"。你可以使用R的代码来实现这一点,以下是一个示例:
# 合并日期和时间列
mn$Time_sp <- paste(mn$Date, "00:00:00+00", sep=" ")
# 删除原来的Date列
mn <- mn %>% select(-Date)
# 重新排列列的顺序,如果需要
mn <- mn %>% select(Time_sp, everything())
这将产生你期望的输出格式,其中日期和时间合并为一列,格式为"hh:mm:ss+00",列名为"time_sp"。
英文:
I have calculated every 15rows mean of my data (.txt file) using the code given below in R.
x <- df %>% separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
mn <- x %>%
group_by(group = as.integer(gl(n(), 15, n()))) %>%
summarise_all(funs(mean))
write.csv(min, 'C:/Users/Alexia/Desktop/Test/15row.csv')
I am getting the output successfully but in the Date and Time columns output, I am receiving NA. However, the desired output should be as follows: (Date and Time should be in one column with time written as hh:mm:ss+00 and name of column needs to be time_sp)
Time_sp Col1 Col2 Col3....
2021-01-01 00:00:00+00 12 36 56
2021-01-01 00:15:00+00 34 54 43
2021-01-01 00:30:00+00 24 23 21
2021-01-01 00:45:00+00 12 36 56
2021-01-01 01:00:00+00 34 54 43
2021-01-01 01:15:00+00 24 23 21
2021-01-01 01:30:00+00 12 36 43
2021-01-01 01:45:00+00 12 36 34
2021-01-01 02:00:00+00 12 36 34
.
.
.
My input data (.txt) is of per minute and has Date and Time in following manner:
Date/Time Col1 Col2 Col3....
2021-01-01T00:00:00 20 12 34...
2021-01-01T00:01:00 .....
2021-01-01T00:02:00 .....
2021-01-01T00:03:00 .....
2021-01-01T01:04:00 .....
2021-01-01T01:05:00 .....
2021-01-01T01:05:00 .....
2021-01-01T01:07:00 .....
2021-01-01T02:08:00 .....
The output of dput(df) is as follows:
structure(list(`Date/Time` = c("2021-03-01T00:01:00", "2021-03-
01T00:02:00", "2021-03-01T00:03:00", "2021-03-01T00:04:00", "2021-03-
01T00:05:00", "2021-03-01T00:06:00", "2021-03-01T00:07:00", "2021-03-
01T00:08:00", "2021-03-01T00:09:00", "2021-03-01T00:10:00", "2021-03-
01T00:11:00", "2021-03-01T00:12:00", "2021-03-01T00:13:00", "2021-03-
01T00:14:00", "2021-03-01T00:15:00", "2021-03-01T00:16:00", "2021-03-
01T00:17:00", "2021-03-01T00:18:00", "2021-03-01T00:19:00", "2021-03-
01T00:20:00", "2021-03-01T00:21:00", "2021-03-01T00:22:00", "2021-03-
01T00:23:00", "2021-03-01T00:24:00", "2021-03-01T00:25:00", "2021-03-
01T00:26:00", "2021-03-01T00:27:00", "2021-03-01T00:28:00", "2021-03-
01T00:29:00", "2021-03-01T00:30:00"), `XY [XY]` = c(0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641,
0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641, 0.990641),
`C1 [CC]` = c(257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L, 257L,
257L, 257L, 257L), Cc = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `C2 [C2]` = c(285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L,
285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L, 285L), Dc = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
`C3 [C2]` = c(255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L, 255L,
255L, 255L, 255L), Ac = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), C4 = c(0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735, 0.463735, 0.463735,
0.463735, 0.463735, 0.463735, 0.463735), `C5 [h]` = c(1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L, 1013L,
1013L, 1013L), `C6 [%]` = c(43L, 43L, 43L, 43L, 43L, 43L,
43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L,
43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 43L
), `C7 [E2]` = c(390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L, 390L,
390L, 390L, 390L, 390L), Jc = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `D [S]` = c(62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716, 62.3716,
62.3716), `Sw [S2]` = c(1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95, 1392.95,
1392.95, 1392.95, 1392.95, 1392.95, 1392.95), `SW [Q2]` =
c(389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164, 389.164, 389.164, 389.164, 389.164, 389.164, 389.164,
389.164), `OA [H2]` = c(646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61, 646.61,
646.61, 646.61), `T2 [C]` = c(3.7, 3.7, 3.7, 3.7, 3.7,
3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7,
3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7,
3.7), Lc = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-30L))
答案1
得分: 1
以下是您要翻译的内容:
问题出在您的数据类型上 - 您需要告诉 R
您正在使用日期和时间,否则它会假定您正在使用字符向量。如果您对字符向量取平均值,它会产生 NA
。
尝试一下:
library(lubridate)
x <- df %>% separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
min <- x %>%
as_tibble() %>%
group_by(group = as.integer(gl(n(), 15, n()))) %>%
mutate(
# 将日期列转换为日期数据类型
Date = lubridate::ymd(Date),
# 将时间列转换为周期数据类型(HMS)。然后,
# 将其转换为秒数
Time = period_to_seconds(hms(Time))
) %>%
summarise(across(everything(), mean)) %>%
# 将时间列从秒数转换回周期数据类型(HMS)。如果您希望平均值以秒为单位,请省略此行
mutate(Time = seconds_to_period(Time))
min
#> # A tibble: 2 × 21
#> group Date Time `XY [XY]` `C1 [CC]` Cc `C2 [C2]`
#> <int> <date> <Period> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2021-03-01 8M 0S 0.991 257 0 285
#> 2 2 2021-03-01 23M 0S 0.991 257 0 285
#> # ℹ 14 more variables: Dc <dbl>, `C3 [C2]` <dbl>, Ac <dbl>,
#> # C4 <dbl>, `C5 [h]` <dbl>, `C6 [%]` <dbl>, `C7 [E2]` <dbl>,
#> # Jc <dbl>, `D [S]` <dbl>, `Sw [S2]` <dbl>, `SW [Q2]` <dbl>,
#> # `OA [H2]` <dbl>, `T2 [C]` <dbl>, Lc <dbl>
write.csv(min, 'C:/Users/Alexia/Desktop/Test/15row.csv')
英文:
The issue is with your data types - you need to tell R
that you are using dates and times, or it will assume you are using character vectors. If you take the mean of a character vector, it produces NA
.
Try:
library(lubridate)
x <- df %>% separate(`Date/Time`, into = c("Date", "Time"), sep = "T")
min <- x %>%
as_tibble() %>%
group_by(group = as.integer(gl(n(), 15, n()))) %>%
mutate(
# Convert Date column into the Date datatype
Date = lubridate::ymd(Date),
# Convert Time column into the Period datatype (HMS). Then,
# change this to number of seconds
Time = period_to_seconds(hms(Time))
) %>%
summarise(across(everything(), mean)) %>%
# Convert Time column from number of seconds
# back into the Period datatype (HMS). Omit this line
# if you'd prefer to have the average in seconds
mutate(Time = seconds_to_period(Time))
min
#> # A tibble: 2 × 21
#> group Date Time `XY [XY]` `C1 [CC]` Cc `C2 [C2]`
#> <int> <date> <Period> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2021-03-01 8M 0S 0.991 257 0 285
#> 2 2 2021-03-01 23M 0S 0.991 257 0 285
#> # ℹ 14 more variables: Dc <dbl>, `C3 [C2]` <dbl>, Ac <dbl>,
#> # C4 <dbl>, `C5 [h]` <dbl>, `C6 [%]` <dbl>, `C7 [E2]` <dbl>,
#> # Jc <dbl>, `D [S]` <dbl>, `Sw [S2]` <dbl>, `SW [Q2]` <dbl>,
#> # `OA [H2]` <dbl>, `T2 [C]` <dbl>, Lc <dbl>
write.csv(min, 'C:/Users/Alexia/Desktop/Test/15row.csv')
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论