for loops, eval(parse(text = paste0 … into vectorised lists but keep getting error :attempt to select more than one element in vector Index

huangapple go评论65阅读模式
英文:

for loops, eval(parse(text = paste0 ... into vectorised lists but keep getting error :attempt to select more than one element in vector Index

问题

这是我的原始代码,非常混乱不整洁:
我的先前问题链接

library(dplyr); library(plyr)
library(magrittr); library(stringr) 
library(ExclusionTable)
library(lubridate)
library(tidyverse); library(tidyr)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)

# AA cohort (2 of 3)
## as

i=1
num_fu = c(1,2,3,4,5,6,7,8,9)
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
data_dir <- 'C:/Users/thepr/Documents/data/as';

assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
                                        function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- df

for (i in 2:length(num_fu)){
RID_common <- as1$RID %in% get(paste0("as", i))$RID

      assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
      assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
                                              function(x){base::as.data.frame(read.csv(x))}))
      nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
      assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
      df <- Reduce(full_join, get(paste0("as", i, "_list")))
      assign(paste0("as",i), df[!duplicated(base::as.list(df)))])
      
      dfs <- merge(dfs, df, by = "RID", all.x = TRUE)
      dfs <- dfs[!duplicated(base::as.list(dfs))]
            if(paste0("AS", i, "_AREA") %in% colnames(get(paste0("as", i)))){
              assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_AREA"))])
              assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_AREA"))])
            # FU rate
              assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
            }
            else if(paste0("AS", i, "_DATA_CLASS") %in% colnames(get(paste0("as", i)))){
              assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
              assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
            # FU rate
              assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
            }
            else{}
}

在@Gregor Thomas和@joran的有益评论之后,我阅读了以前的帖子,最终开始使用列表和向量。到目前为止,这是我尝试过的:

library(tidyverse) #包括:dplyr,stringr,tidyr
library(magrittr)
library(lubridate)
library(ExclusionTable)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)

# AA cohort (2 of 3)
## as
i=1
data_dir = c("C:/Users/thepr/Documents/data/as")
num_fu = c(1,2,3,4,5,6,7,8,9)
dirs <- paste0(data_dir, num_fu) # 字符串
as <- data.frame()
df <- data.frame()
dfs <- data.frame()

flnames <- list.files(path = dirs, pattern = "\\.csv", full.names = TRUE)
as_list[[num_fu]] <- lapply(flnames[[num_fu]],
       function(x){base::as.data.frame(read.csv(x))})
names(as_list) <- gsub(".csv", "", basename(flnames[[num_fu]])) %>% str_sub(., 1,6)
df <- Reduce(full_join, as_list)
df <- df[!duplicated(base::as.list(df))]

不知何故我一直收到错误消息:
Error in flnames[[num_fu]] : attempt to select more than one element in vectorIndex

根据How do I make a list of data frames?,我认为我走在正确的方向上。请提供一些见解和思考。将不胜感激,谢谢。

英文:

This was my original code, very messy and untidy:
Link to my previous question

library(dplyr); library(plyr)
library(magrittr); library(stringr) 
library(ExclusionTable)
library(lubridate)
library(tidyverse); library(tidyr)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
num_fu = c(1,2,3,4,5,6,7,8,9)
as &lt;- data.frame()
df &lt;- data.frame()
dfs &lt;- data.frame()
data_dir &lt;- &#39;C:/Users/thepr/Documents/data/as&#39;
assign(paste0(&quot;flnames&quot;, i), list.files(path = paste0(data_dir, i), pattern = &quot;\\.csv&quot;, full.names = TRUE))
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), lapply(get(paste0(&quot;flnames&quot;, i)),
function(x){base::as.data.frame(read.csv(x))}))
nm &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(eval(parse(text = paste0(&quot;flnames&quot;, i))))) %&gt;% str_sub(., 1,6)
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), setNames(get(paste0(&quot;as&quot;, i, &quot;_list&quot;)), nm))
df &lt;- Reduce(full_join, get(paste0(&quot;as&quot;, i, &quot;_list&quot;)))
assign(paste0(&quot;as&quot;,i), df[!duplicated(base::as.list(df))])
dfs &lt;- df
for (i in 2:length(num_fu)){
RID_common &lt;- as1$RID %in% get(paste0(&quot;as&quot;, i))$RID
assign(paste0(&quot;flnames&quot;, i), list.files(path = paste0(data_dir, i), pattern = &quot;\\.csv&quot;, full.names = TRUE))
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), lapply(get(paste0(&quot;flnames&quot;, i)),
function(x){base::as.data.frame(read.csv(x))}))
nm &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(eval(parse(text = paste0(&quot;flnames&quot;, i))))) %&gt;% str_sub(., 1,6)
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), setNames(get(paste0(&quot;as&quot;, i, &quot;_list&quot;)), nm))
df &lt;- Reduce(full_join, get(paste0(&quot;as&quot;, i, &quot;_list&quot;)))
assign(paste0(&quot;as&quot;,i), df[!duplicated(base::as.list(df))])
dfs &lt;- merge(dfs, df, by = &quot;RID&quot;, all.x = TRUE)
dfs &lt;- dfs[!duplicated(base::as.list(dfs))]
if(paste0(&quot;AS&quot;, i, &quot;_AREA&quot;) %in% colnames(get(paste0(&quot;as&quot;, i)))){
assign(paste0(&quot;fu_&quot;,i-1), get(paste0(&quot;as&quot;, i))[RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_AREA&quot;))])
assign(paste0(&quot;fu_loss_&quot;,i-1), get(paste0(&quot;as&quot;, i))[!RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_AREA&quot;))])
# FU rate
assign(paste0(&quot;fu_rate_&quot;, i-1), nrow(get(paste0(&quot;as&quot;, i)))/nrow(as1))
}
else if(paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;) %in% colnames(get(paste0(&quot;as&quot;, i)))){
assign(paste0(&quot;fu_&quot;,i-1), get(paste0(&quot;as&quot;, i))[RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;))])
assign(paste0(&quot;fu_loss_&quot;,i-1), get(paste0(&quot;as&quot;, i))[!RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;))])
# FU rate
assign(paste0(&quot;fu_rate_&quot;, i-1), nrow(get(paste0(&quot;as&quot;, i)))/nrow(as1))
}
else{}
}

After helpful comments by @Gregor Thomas @joran, I read previous posts and finally started using lists and vectors. Here is what I have tried so far:

library(tidyverse) #Includes: dplyr, stringr, tidyr
library(magrittr)
library(lubridate)
library(ExclusionTable)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
data_dir = c(&quot;C:/Users/thepr/Documents/data/as&quot;)
num_fu = c(1,2,3,4,5,6,7,8,9)
dirs &lt;- paste0(data_dir, num_fu) # character
as &lt;- data.frame()
df &lt;- data.frame()
dfs &lt;- data.frame()
flnames &lt;- list.files(path = dirs, pattern = &quot;\\.csv&quot;, full.names = TRUE)
as_list[[num_fu]] &lt;- lapply(flnames[[num_fu]],
function(x){base::as.data.frame(read.csv(x))})
names(as_list) &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(flnames[[num_fu]])) %&gt;% str_sub(., 1,6)
df &lt;- Reduce(full_join, as_list)
df &lt;- df[!duplicated(base::as.list(df))]

somehow I keep getting error messages:
Error in flnames[[num_fu]] :
attempt to select more than one element in vectorIndex

Based on How do I make a list of data frames?, I think I am headed in the right direction. Please give some insights and thougths. Will be appreciated, thanks.

答案1

得分: 1

[[始终选择一个元素。这就是错误消息试图告诉您的内容。

根据您的 num_fu 值,您想要做的是切片列表以提取子集。您需要使用 [ 来执行此操作。

此外,在赋值目标中删除索引。也就是说,写成:

as_list = lapply(flnames[num_fu], read.csv)

但是,目前不清楚您实际是否需要索引,因为它与我理解的任何内容都不对应。您是否只是想要以下内容?

as_list = lapply(flnames[num_fu], read.csv)

或者,如果您希望将整个内容放入嵌套列表中,其中每个子目录都有自己的子列表,有几种解决方案。其中之一是以下内容(注意 lapply 的嵌套调用):

flnames = lapply(dirs, list.files, pattern = ".csv", full.names = TRUE)
as_list = lapply(
  flnames,
  function (subdir) {
    setNames(
      lapply(subdir, read.csv),
      sub(".csv$", "", basename(subdir)) %>% str_sub(., 1, 6)
    )
  }
)

等等。

不过,我建议保持结构扁平。特别是如果您希望随后将整个内容合并到一个单一的数据框中,可以通过以下方式极大简化整个代码

data_dir = "C:/Users/thepr/Documents/data/as"
num_fu = 1 : 9
dirs = paste0(data_dir, num_fu)
files = list.files(dirs, pattern = ".csv$", full.names = TRUE)

df = purrr::map_dfr(files, read.csv, .id = "Filename") %>%
  mutate(Filename = str_sub(sub(".csv$", "", basename(Filename)), 1, 6))

(顺便说一下,避免混合使用 =<- 进行赋值,这会使代码变得混乱;您可以使用其中任何一个,只是不要混用它们。)

英文:

[[ always selects exactly one element. This is what the error message is trying to tell you.

What you want to do (based on your num_fu value) is to slice the list to extract a subset. You need to use [ to do that.

Furthermore, remove the indexing in the assignment target. That is, write:

as_list = lapply(flnames[num_fu], read.csv)

However, it's unclear that you actually want the indexing here at all, since it does not correspond to anything that I understand. Don't you simply want the following instead?

as_list = lapply(flnames[num_fu], read.csv)

Alternatively, if you want the entire thing in a nested list, where each subdirectory has its own sub-list, there are several solutions. One would be the following (note the nested invocation of lapply):

flnames = lapply(dirs, list.files, pattern = &quot;\\.csv&quot;, full.names = TRUE)
as_list = lapply(
flnames,
function (subdir) {
setNames(
lapply(subdir, read.csv),
sub(&quot;\\.csv$&quot;, &quot;&quot;, basename(subdir)) %&gt;% str_sub(., 1,6)
)
}
)

etc.

However, I would instead recommend keeping the structure flat. And, especially if you want to subsequently merge the entire thing into one single data.frame, you can drastically simplify the entire code as follows:

data_dir = &quot;C:/Users/thepr/Documents/data/as&quot;
num_fu = 1 : 9
dirs = paste0(data_dir, num_fu)
files = list.files(dirs, pattern = &quot;\\.csv$&quot;, full.names = TRUE)
df = purrr::map_dfr(files, read.csv, .id = &quot;Filename&quot;) %&gt;%
mutate(Filename = str_sub(sub(&quot;\\.csv$&quot;, &quot;&quot;, basename(Filename)), 1, 6))

(Incidentally, avoid mixing = and &lt;- for assignment, it makes the code messy; you can use either one, just don’t mix them.)

huangapple
  • 本文由 发表于 2023年6月6日 14:11:18
  • 转载请务必保留本文链接:https://go.coder-hub.com/76411856.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定