2023年6月6日 14:11:18go评论91阅读模式

英文:

for loops, eval(parse(text = paste0 ... into vectorised lists but keep getting error :attempt to select more than one element in vector Index

问题

这是我的原始代码，非常混乱不整洁：
我的先前问题链接

library(dplyr); library(plyr)
library(magrittr); library(stringr) 
library(ExclusionTable)
library(lubridate)
library(tidyverse); library(tidyr)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
num_fu = c(1,2,3,4,5,6,7,8,9)
as &lt;- data.frame()
df &lt;- data.frame()
dfs &lt;- data.frame()
data_dir &lt;- 'C:/Users/thepr/Documents/data/as';
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
                                        function(x){base::as.data.frame(read.csv(x))}))
nm &lt;- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df &lt;- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs &lt;- df
for (i in 2:length(num_fu)){
RID_common &lt;- as1$RID %in% get(paste0("as", i))$RID
      assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
      assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
                                              function(x){base::as.data.frame(read.csv(x))}))
      nm &lt;- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
      assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
      df &lt;- Reduce(full_join, get(paste0("as", i, "_list")))
      assign(paste0("as",i), df[!duplicated(base::as.list(df)))])
      
      dfs &lt;- merge(dfs, df, by = "RID", all.x = TRUE)
      dfs &lt;- dfs[!duplicated(base::as.list(dfs))]
            if(paste0("AS", i, "_AREA") %in% colnames(get(paste0("as", i)))){
              assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_AREA"))])
              assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_AREA"))])
            # FU rate
              assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
            }
            else if(paste0("AS", i, "_DATA_CLASS") %in% colnames(get(paste0("as", i)))){
              assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
              assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
            # FU rate
              assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
            }
            else{}
}

在@Gregor Thomas和@joran的有益评论之后，我阅读了以前的帖子，最终开始使用列表和向量。到目前为止，这是我尝试过的：

library(tidyverse) #包括：dplyr，stringr，tidyr
library(magrittr)
library(lubridate)
library(ExclusionTable)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
data_dir = c("C:/Users/thepr/Documents/data/as")
num_fu = c(1,2,3,4,5,6,7,8,9)
dirs <- paste0(data_dir, num_fu) # 字符串
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
flnames <- list.files(path = dirs, pattern = "\\.csv", full.names = TRUE)
as_list[[num_fu]] <- lapply(flnames[[num_fu]],
       function(x){base::as.data.frame(read.csv(x))})
names(as_list) <- gsub(".csv", "", basename(flnames[[num_fu]])) %>% str_sub(., 1,6)
df <- Reduce(full_join, as_list)
df <- df[!duplicated(base::as.list(df))]

不知何故我一直收到错误消息：
Error in flnames[[num_fu]] : attempt to select more than one element in vectorIndex

根据How do I make a list of data frames?，我认为我走在正确的方向上。请提供一些见解和思考。将不胜感激，谢谢。

英文:

This was my original code, very messy and untidy:
Link to my previous question

library(dplyr); library(plyr)
library(magrittr); library(stringr) 
library(ExclusionTable)
library(lubridate)
library(tidyverse); library(tidyr)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
num_fu = c(1,2,3,4,5,6,7,8,9)
as &lt;- data.frame()
df &lt;- data.frame()
dfs &lt;- data.frame()
data_dir &lt;- &#39;C:/Users/thepr/Documents/data/as&#39;
assign(paste0(&quot;flnames&quot;, i), list.files(path = paste0(data_dir, i), pattern = &quot;\\.csv&quot;, full.names = TRUE))
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), lapply(get(paste0(&quot;flnames&quot;, i)),
function(x){base::as.data.frame(read.csv(x))}))
nm &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(eval(parse(text = paste0(&quot;flnames&quot;, i))))) %&gt;% str_sub(., 1,6)
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), setNames(get(paste0(&quot;as&quot;, i, &quot;_list&quot;)), nm))
df &lt;- Reduce(full_join, get(paste0(&quot;as&quot;, i, &quot;_list&quot;)))
assign(paste0(&quot;as&quot;,i), df[!duplicated(base::as.list(df))])
dfs &lt;- df
for (i in 2:length(num_fu)){
RID_common &lt;- as1$RID %in% get(paste0(&quot;as&quot;, i))$RID
assign(paste0(&quot;flnames&quot;, i), list.files(path = paste0(data_dir, i), pattern = &quot;\\.csv&quot;, full.names = TRUE))
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), lapply(get(paste0(&quot;flnames&quot;, i)),
function(x){base::as.data.frame(read.csv(x))}))
nm &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(eval(parse(text = paste0(&quot;flnames&quot;, i))))) %&gt;% str_sub(., 1,6)
assign(paste0(&quot;as&quot;, i, &quot;_list&quot;), setNames(get(paste0(&quot;as&quot;, i, &quot;_list&quot;)), nm))
df &lt;- Reduce(full_join, get(paste0(&quot;as&quot;, i, &quot;_list&quot;)))
assign(paste0(&quot;as&quot;,i), df[!duplicated(base::as.list(df))])
dfs &lt;- merge(dfs, df, by = &quot;RID&quot;, all.x = TRUE)
dfs &lt;- dfs[!duplicated(base::as.list(dfs))]
if(paste0(&quot;AS&quot;, i, &quot;_AREA&quot;) %in% colnames(get(paste0(&quot;as&quot;, i)))){
assign(paste0(&quot;fu_&quot;,i-1), get(paste0(&quot;as&quot;, i))[RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_AREA&quot;))])
assign(paste0(&quot;fu_loss_&quot;,i-1), get(paste0(&quot;as&quot;, i))[!RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_AREA&quot;))])
# FU rate
assign(paste0(&quot;fu_rate_&quot;, i-1), nrow(get(paste0(&quot;as&quot;, i)))/nrow(as1))
}
else if(paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;) %in% colnames(get(paste0(&quot;as&quot;, i)))){
assign(paste0(&quot;fu_&quot;,i-1), get(paste0(&quot;as&quot;, i))[RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;))])
assign(paste0(&quot;fu_loss_&quot;,i-1), get(paste0(&quot;as&quot;, i))[!RID_common, c(&quot;RID&quot;, paste0(&quot;AS&quot;, i, &quot;_DATA_CLASS&quot;))])
# FU rate
assign(paste0(&quot;fu_rate_&quot;, i-1), nrow(get(paste0(&quot;as&quot;, i)))/nrow(as1))
}
else{}
}

After helpful comments by @Gregor Thomas @joran, I read previous posts and finally started using lists and vectors. Here is what I have tried so far:

library(tidyverse) #Includes: dplyr, stringr, tidyr
library(magrittr)
library(lubridate)
library(ExclusionTable)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
data_dir = c(&quot;C:/Users/thepr/Documents/data/as&quot;)
num_fu = c(1,2,3,4,5,6,7,8,9)
dirs &lt;- paste0(data_dir, num_fu) # character
as &lt;- data.frame()
df &lt;- data.frame()
dfs &lt;- data.frame()
flnames &lt;- list.files(path = dirs, pattern = &quot;\\.csv&quot;, full.names = TRUE)
as_list[[num_fu]] &lt;- lapply(flnames[[num_fu]],
function(x){base::as.data.frame(read.csv(x))})
names(as_list) &lt;- gsub(&quot;.csv&quot;, &quot;&quot;, basename(flnames[[num_fu]])) %&gt;% str_sub(., 1,6)
df &lt;- Reduce(full_join, as_list)
df &lt;- df[!duplicated(base::as.list(df))]

somehow I keep getting error messages:
Error in flnames[[num_fu]] : attempt to select more than one element in vectorIndex

Based on How do I make a list of data frames?, I think I am headed in the right direction. Please give some insights and thougths. Will be appreciated, thanks.

答案1

得分: 1

[[始终选择一个元素。这就是错误消息试图告诉您的内容。

根据您的 num_fu 值，您想要做的是切片列表以提取子集。您需要使用 [ 来执行此操作。

此外，在赋值目标中删除索引。也就是说，写成：

as_list = lapply(flnames[num_fu], read.csv)

但是，目前不清楚您实际是否需要索引，因为它与我理解的任何内容都不对应。您是否只是想要以下内容？

as_list = lapply(flnames[num_fu], read.csv)

或者，如果您希望将整个内容放入嵌套列表中，其中每个子目录都有自己的子列表，有几种解决方案。其中之一是以下内容（注意 lapply 的嵌套调用）：

flnames = lapply(dirs, list.files, pattern = ".csv", full.names = TRUE)
as_list = lapply(
  flnames,
  function (subdir) {
    setNames(
      lapply(subdir, read.csv),
      sub(".csv$", "", basename(subdir)) %>% str_sub(., 1, 6)
    )
  }
)

等等。

不过，我建议保持结构扁平。特别是如果您希望随后将整个内容合并到一个单一的数据框中，可以通过以下方式极大简化整个代码：

data_dir = "C:/Users/thepr/Documents/data/as"
num_fu = 1 : 9
dirs = paste0(data_dir, num_fu)
files = list.files(dirs, pattern = ".csv$", full.names = TRUE)
df = purrr::map_dfr(files, read.csv, .id = "Filename") %>%
  mutate(Filename = str_sub(sub(".csv$", "", basename(Filename)), 1, 6))

（顺便说一下，避免混合使用 = 和 <- 进行赋值，这会使代码变得混乱；您可以使用其中任何一个，只是不要混用它们。）

英文:

[[ always selects exactly one element. This is what the error message is trying to tell you.

What you want to do (based on your num_fu value) is to slice the list to extract a subset. You need to use [ to do that.

Furthermore, remove the indexing in the assignment target. That is, write:

as_list = lapply(flnames[num_fu], read.csv)

However, it's unclear that you actually want the indexing here at all, since it does not correspond to anything that I understand. Don't you simply want the following instead?

as_list = lapply(flnames[num_fu], read.csv)

Alternatively, if you want the entire thing in a nested list, where each subdirectory has its own sub-list, there are several solutions. One would be the following (note the nested invocation of lapply):

flnames = lapply(dirs, list.files, pattern = &quot;\\.csv&quot;, full.names = TRUE)
as_list = lapply(
flnames,
function (subdir) {
setNames(
lapply(subdir, read.csv),
sub(&quot;\\.csv$&quot;, &quot;&quot;, basename(subdir)) %&gt;% str_sub(., 1,6)
)
}
)

etc.

However, I would instead recommend keeping the structure flat. And, especially if you want to subsequently merge the entire thing into one single data.frame, you can drastically simplify the entire code as follows:

data_dir = &quot;C:/Users/thepr/Documents/data/as&quot;
num_fu = 1 : 9
dirs = paste0(data_dir, num_fu)
files = list.files(dirs, pattern = &quot;\\.csv$&quot;, full.names = TRUE)
df = purrr::map_dfr(files, read.csv, .id = &quot;Filename&quot;) %&gt;%
mutate(Filename = str_sub(sub(&quot;\\.csv$&quot;, &quot;&quot;, basename(Filename)), 1, 6))

(Incidentally, avoid mixing = and <- for assignment, it makes the code messy; you can use either one, just don’t mix them.)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

for loops, eval(parse(text = paste0 … into vectorised lists but keep getting error :attempt to select more than one element in vector Index

问题

答案1

在R的tidymodels中，如何设置评估逻辑模型的默认水平？

如何在一个逐行处理矩阵的for循环中包含一些列向计算？

如何转置排名数据并将单元格名称转换为列名称？

重新分类栅格tif为唯一数值

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。