英文:
using mutate, paste0 and case_when in iteration; problem with mutating existing variables with paste0
问题
@r2evans 针对您的请求,这是数据框架的快照和dput(head(as))的部分值(部分):
# dput(head(as[, c("AS1_WEIGHT", "AS2_WEIGHT", "AS3_WEIGHT", "AS4_WEIGHT", "AS5_WEIGHT", "AS6_WEIGHT", "AS7_WEIGHT", "AS8_WEIGHT", "AS9_WEIGHT")]))
structure(list(AS1_WEIGHT = c(72, 59, 50, 55.2, 82.1, 50.4),
AS2_WEIGHT = c(74.8, NA, NA, 54.8, 84.5, 52.5), AS3_WEIGHT = c(75.2,
NA, NA, 55.9, 81.7, 54.6), AS4_WEIGHT = c(75, NA, NA, 55.1,
80.6, NA), AS5_WEIGHT = c(75.4, NA, NA, 58.8, 89.5, NA),
AS6_WEIGHT = c(77.3, NA, NA, NA, NA, NA), AS7_WEIGHT = c(70.7,
NA, NA, 56, NA, NA), AS8_WEIGHT = c(73.8, NA, NA, 55.5, NA,
NA), AS9_WEIGHT = c(74.5, NA, NA, 54.8, NA, 52)), class = "data.frame", row.names = c(NA,
-6L))
我想要在循环中使用paste0来变异现有变量,但是我一直收到错误消息:
Error: unexpected symbol in:
" dplyr::mutate(AS1_WEIGHT = case_when(as$AS1_WEIGHT == '99999' & as$AS2_WEIGHT == '99999' ~ as$AS3_WEIGHT
as"
MRE:
as
...
这是我遇到困难的地方(包括解析错误):
i=2
for (i in 2:length(num_fu)-1){
print(i)
if(i<length(num_fu)){
as %<>%
dplyr::mutate(paste0("AS", i, "_WEIGHT") = case_when(is.na(as[,paste0("AS", i, "_WEIGHT")]) ~ NA_character_
as[,paste0("AS", i, "_WEIGHT")] == '99999' ~ as[,paste0("AS", i+1, "_WEIGHT")],
as[,paste0("AS", i, "_WEIGHT")] != '99999' ~ as[,paste0("AS", i, "_WEIGHT")],
TRUE ~ NA_character_), na.rm = TRUE)
}
paste0("D_WEIGHT", i-1) <- round(100*(as[,paste0("AS", i, "_WEIGHT")]-as[,paste0("AS", i-1, "_WEIGHT")])/as[,paste0("AS", i-1, "_WEIGHT")], 1)
}
然而,
i=2
as %<>%
mutate(paste0("AS", i, "_WEIGHT") = 1
会输出相同的错误消息,而
as %<>%
mutate(AS2_WEIGHT) = 1
却可以正常运行。
预期输出:
将每列(as$AS1_WEIGHT ... as$AS9_WEIGHT)中值为99999
的替换为同一行中相邻列的值。
我看到可能需要使用!!!
,但我感到困惑。
任何评论都会很有帮助。谢谢。
英文:
@r2evans For your request, here is a snapshot of the dataframe
and the values for dput(head(as)) is as follows (partial)
# dput(head(as[, c("AS1_WEIGHT", "AS2_WEIGHT", "AS3_WEIGHT", "AS4_WEIGHT", "AS5_WEIGHT", "AS6_WEIGHT", "AS7_WEIGHT", "AS8_WEIGHT", "AS9_WEIGHT")]))
structure(list(AS1_WEIGHT = c(72, 59, 50, 55.2, 82.1, 50.4),
AS2_WEIGHT = c(74.8, NA, NA, 54.8, 84.5, 52.5), AS3_WEIGHT = c(75.2,
NA, NA, 55.9, 81.7, 54.6), AS4_WEIGHT = c(75, NA, NA, 55.1,
80.6, NA), AS5_WEIGHT = c(75.4, NA, NA, 58.8, 89.5, NA),
AS6_WEIGHT = c(77.3, NA, NA, NA, NA, NA), AS7_WEIGHT = c(70.7,
NA, NA, 56, NA, NA), AS8_WEIGHT = c(73.8, NA, NA, 55.5, NA,
NA), AS9_WEIGHT = c(74.5, NA, NA, 54.8, NA, 52)), class = "data.frame", row.names = c(NA,
-6L))
I want to mutate existing variables in a loop using paste0, but I keep having error messages:
Error: unexpected symbol in:
" dplyr::mutate(AS1_WEIGHT = case_when(as$AS1_WEIGHT == '99999' & as$AS2_WEIGHT == '99999' ~ as$AS3_WEIGHT
as"
MRE:
as
is a dataframe, code looks a bit complicated, but these are cohort datas gathered at 9 different timelines.
library(dplyr)
library(plyr)
library(magrittr)
library(stringr)
library(ExclusionTable)
library(lubridate)
library(tidyverse)
library(janitor)
library(survival)
library(ggsurvfit)
library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
num_fu = c(1,2,3,4,5,6,7,8,9)
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
data_dir <- 'C:/Users/thepr/Documents/data/as'
i=1;
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- df
for (i in 2:length(num_fu)){
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- merge(dfs, df, by = "RID", all.x = TRUE)
dfs <- dfs[!duplicated(base::as.list(dfs))]
if(length(grep(paste0("AS", i, "_AREA"), names(get(paste0("as", i))))) > 0){
assign(paste0("fu_",i-1), get(paste0("as", i))[as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_AREA"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_AREA"))])
# FU rate
assign(paste0("fu_rate_", i-1), round(nrow(get(paste0("as", i)))/nrow(as1), 2))
}
else{
assign(paste0("fu_",i-1), get(paste0("as", i))[as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_DATA_CLASS"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_DATA_CLASS"))])
# FU rate
assign(paste0("fu_rate_", i-1), round(nrow(get(paste0("as", i)))/nrow(as1), 2))
}
}
as <- dfs
as <- as[!duplicated(base::as.list(as))]
rm(list = intersect(ls(), c("df", "dfs", "i")))
And below is where I am having a trouble (including the parsing error)
i=2
for (i in 2:length(num_fu)-1){
print(i)
if(i<length(num_fu)){
as %<>%
dplyr::mutate(paste0("AS", i, "_WEIGHT") = case_when(is.na(as[,paste0("AS", i, "_WEIGHT")]) ~ NA_character_
as[,paste0("AS", i, "_WEIGHT")] == '99999' ~ as[,paste0("AS", i+1, "_WEIGHT")],
as[,paste0("AS", i, "_WEIGHT")] != '99999' ~ as[,paste0("AS", i, "_WEIGHT")],
TRUE ~ NA_character_), na.rm = TRUE)
}
paste0("D_WEIGHT", i-1) <- round(100*(as[,paste0("AS", i, "_WEIGHT")]-as[,paste0("AS", i-1, "_WEIGHT")])/as[,paste0("AS", i-1, "_WEIGHT")], 1)
}
However,
i=2
as %<>%
mutate(paste0("AS", i, "_WEIGHT") = 1
spits out the same error message, while
as %<>%
mutate(AS2_WEIGHT) = 1
works fine.
Expected output would be:
Every column(as$AS1_WEIGHT ... as$AS9_WEIGHT) with value 99999
will be replaced by the value in the same row, but in the adjacent column.
I saw that using !!!
might be necessary but I am lost.
Any comments will be helpful. Thank you.
答案1
得分: 2
我假设这里的主要任务是将99999的值替换为右侧列中找到的值。在这里,我进行了长格式转换,以便右侧的值是每个“row”组内的下一个值(即lead(value)
)。
如果两个相邻的列都有'99999'值,不确定您希望出现什么行为。
library(tidyverse)
as <- as %>%
mutate(row = row_number()) %>%
pivot_longer(-row) %>%
mutate(value = if_else(value == '99999', lead(value), value), .by = row) %>%
pivot_wider(names_from = name, values_from = value)
英文:
I'm assuming the main task here is to replace 99999 values with the value found in the next column to the right. Here, I pivot longer so that the value to the right is the following value (i.e. lead(value)
) within each row
group.
Not sure what behavior you want if two adjacent columns both have '99999' values.
library(tidyverse)
as <- as %>%
mutate(row = row_number()) %>%
pivot_longer(-row) %>%
mutate(value = if_else(value == '99999', lead(value), value), .by = row) %>%
pivot_wider(names_from = name, values_from = value)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论