英文:
using mutate, paste0 and case_when in iteration; problem with mutating existing variables with paste0
问题
@r2evans 针对您的请求,这是数据框架的快照和dput(head(as))的部分值(部分):
# dput(head(as[, c("AS1_WEIGHT", "AS2_WEIGHT", "AS3_WEIGHT", "AS4_WEIGHT", "AS5_WEIGHT", "AS6_WEIGHT", "AS7_WEIGHT", "AS8_WEIGHT", "AS9_WEIGHT")]))
structure(list(AS1_WEIGHT = c(72, 59, 50, 55.2, 82.1, 50.4),
AS2_WEIGHT = c(74.8, NA, NA, 54.8, 84.5, 52.5), AS3_WEIGHT = c(75.2,
NA, NA, 55.9, 81.7, 54.6), AS4_WEIGHT = c(75, NA, NA, 55.1,
80.6, NA), AS5_WEIGHT = c(75.4, NA, NA, 58.8, 89.5, NA),
AS6_WEIGHT = c(77.3, NA, NA, NA, NA, NA), AS7_WEIGHT = c(70.7,
NA, NA, 56, NA, NA), AS8_WEIGHT = c(73.8, NA, NA, 55.5, NA,
NA), AS9_WEIGHT = c(74.5, NA, NA, 54.8, NA, 52)), class = "data.frame", row.names = c(NA,
-6L))
我想要在循环中使用paste0来变异现有变量,但是我一直收到错误消息:
Error: unexpected symbol in:
" dplyr::mutate(AS1_WEIGHT = case_when(as$AS1_WEIGHT == '99999' & as$AS2_WEIGHT == '99999' ~ as$AS3_WEIGHT
as"
MRE:
as
...
这是我遇到困难的地方(包括解析错误):
i=2
for (i in 2:length(num_fu)-1){
print(i)
if(i<length(num_fu)){
as %<>%
dplyr::mutate(paste0("AS", i, "_WEIGHT") = case_when(is.na(as[,paste0("AS", i, "_WEIGHT")]) ~ NA_character_
as[,paste0("AS", i, "_WEIGHT")] == '99999' ~ as[,paste0("AS", i+1, "_WEIGHT")],
as[,paste0("AS", i, "_WEIGHT")] != '99999' ~ as[,paste0("AS", i, "_WEIGHT")],
TRUE ~ NA_character_), na.rm = TRUE)
}
paste0("D_WEIGHT", i-1) <- round(100*(as[,paste0("AS", i, "_WEIGHT")]-as[,paste0("AS", i-1, "_WEIGHT")])/as[,paste0("AS", i-1, "_WEIGHT")], 1)
}
然而,
i=2
as %<>%
mutate(paste0("AS", i, "_WEIGHT") = 1
会输出相同的错误消息,而
as %<>%
mutate(AS2_WEIGHT) = 1
却可以正常运行。
预期输出:
将每列(as$AS1_WEIGHT ... as$AS9_WEIGHT)中值为99999的替换为同一行中相邻列的值。
我看到可能需要使用!!!,但我感到困惑。
任何评论都会很有帮助。谢谢。
英文:
@r2evans For your request, here is a snapshot of the dataframe


and the values for dput(head(as)) is as follows (partial)
# dput(head(as[, c("AS1_WEIGHT", "AS2_WEIGHT", "AS3_WEIGHT", "AS4_WEIGHT", "AS5_WEIGHT", "AS6_WEIGHT", "AS7_WEIGHT", "AS8_WEIGHT", "AS9_WEIGHT")]))
structure(list(AS1_WEIGHT = c(72, 59, 50, 55.2, 82.1, 50.4),
AS2_WEIGHT = c(74.8, NA, NA, 54.8, 84.5, 52.5), AS3_WEIGHT = c(75.2,
NA, NA, 55.9, 81.7, 54.6), AS4_WEIGHT = c(75, NA, NA, 55.1,
80.6, NA), AS5_WEIGHT = c(75.4, NA, NA, 58.8, 89.5, NA),
AS6_WEIGHT = c(77.3, NA, NA, NA, NA, NA), AS7_WEIGHT = c(70.7,
NA, NA, 56, NA, NA), AS8_WEIGHT = c(73.8, NA, NA, 55.5, NA,
NA), AS9_WEIGHT = c(74.5, NA, NA, 54.8, NA, 52)), class = "data.frame", row.names = c(NA,
-6L))
I want to mutate existing variables in a loop using paste0, but I keep having error messages:
Error: unexpected symbol in:
" dplyr::mutate(AS1_WEIGHT = case_when(as$AS1_WEIGHT == '99999' & as$AS2_WEIGHT == '99999' ~ as$AS3_WEIGHT
as"
MRE:
as is a dataframe, code looks a bit complicated, but these are cohort datas gathered at 9 different timelines.
library(dplyr)
library(plyr)
library(magrittr)
library(stringr)
library(ExclusionTable)
library(lubridate)
library(tidyverse)
library(janitor)
library(survival)
library(ggsurvfit)
library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
num_fu = c(1,2,3,4,5,6,7,8,9)
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
data_dir <- 'C:/Users/thepr/Documents/data/as'
i=1;
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- df
for (i in 2:length(num_fu)){
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- merge(dfs, df, by = "RID", all.x = TRUE)
dfs <- dfs[!duplicated(base::as.list(dfs))]
if(length(grep(paste0("AS", i, "_AREA"), names(get(paste0("as", i))))) > 0){
assign(paste0("fu_",i-1), get(paste0("as", i))[as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_AREA"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_AREA"))])
# FU rate
assign(paste0("fu_rate_", i-1), round(nrow(get(paste0("as", i)))/nrow(as1), 2))
}
else{
assign(paste0("fu_",i-1), get(paste0("as", i))[as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_DATA_CLASS"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!as1$RID %in% get(paste0("as", i))$RID, c("RID", paste0("AS", i, "_DATA_CLASS"))])
# FU rate
assign(paste0("fu_rate_", i-1), round(nrow(get(paste0("as", i)))/nrow(as1), 2))
}
}
as <- dfs
as <- as[!duplicated(base::as.list(as))]
rm(list = intersect(ls(), c("df", "dfs", "i")))
And below is where I am having a trouble (including the parsing error)
i=2
for (i in 2:length(num_fu)-1){
print(i)
if(i<length(num_fu)){
as %<>%
dplyr::mutate(paste0("AS", i, "_WEIGHT") = case_when(is.na(as[,paste0("AS", i, "_WEIGHT")]) ~ NA_character_
as[,paste0("AS", i, "_WEIGHT")] == '99999' ~ as[,paste0("AS", i+1, "_WEIGHT")],
as[,paste0("AS", i, "_WEIGHT")] != '99999' ~ as[,paste0("AS", i, "_WEIGHT")],
TRUE ~ NA_character_), na.rm = TRUE)
}
paste0("D_WEIGHT", i-1) <- round(100*(as[,paste0("AS", i, "_WEIGHT")]-as[,paste0("AS", i-1, "_WEIGHT")])/as[,paste0("AS", i-1, "_WEIGHT")], 1)
}
However,
i=2
as %<>%
mutate(paste0("AS", i, "_WEIGHT") = 1
spits out the same error message, while
as %<>%
mutate(AS2_WEIGHT) = 1
works fine.
Expected output would be:
Every column(as$AS1_WEIGHT ... as$AS9_WEIGHT) with value 99999 will be replaced by the value in the same row, but in the adjacent column.
I saw that using !!! might be necessary but I am lost.
Any comments will be helpful. Thank you.
答案1
得分: 2
我假设这里的主要任务是将99999的值替换为右侧列中找到的值。在这里,我进行了长格式转换,以便右侧的值是每个“row”组内的下一个值(即lead(value))。
如果两个相邻的列都有'99999'值,不确定您希望出现什么行为。
library(tidyverse)
as <- as %>%
mutate(row = row_number()) %>%
pivot_longer(-row) %>%
mutate(value = if_else(value == '99999', lead(value), value), .by = row) %>%
pivot_wider(names_from = name, values_from = value)
英文:
I'm assuming the main task here is to replace 99999 values with the value found in the next column to the right. Here, I pivot longer so that the value to the right is the following value (i.e. lead(value)) within each row group.
Not sure what behavior you want if two adjacent columns both have '99999' values.
library(tidyverse)
as <- as %>%
mutate(row = row_number()) %>%
pivot_longer(-row) %>%
mutate(value = if_else(value == '99999', lead(value), value), .by = row) %>%
pivot_wider(names_from = name, values_from = value)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。


评论