英文:
Modify the code of dataframes for multiple txt files
问题
以下是修改后的代码,用于处理文件列表:
dfs1 <- list.files(path = 'D:/Test3', pattern = "*txt", recursive = TRUE)
var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
lapply(dfs1, function(x) {
dfn <- read.table(file = x, header = TRUE) # Read the data from the file
missing_cols <- setdiff(var, colnames(dfn)) # Find missing columns
if(length(missing_cols) > 0) {
dfn[, missing_cols] <- NA # Add missing columns and fill with NA
}
dfn <- dfn[, var] # Select only the specified columns
assign(basename(x), dfn, envir = .GlobalEnv) # Assign the modified dataframe to a new variable
})
这个修改后的代码会遍历文件列表,读取每个文件的数据,检查并添加缺失的列,然后只选择指定的列,并将修改后的数据框分配给全局环境中的一个新变量,以文件名为变量名。希望这对你有所帮助。
英文:
Following code is for dataframes df1, df2. The code is reading the data columns (var) and look in each dataframe, if the var column is not present, it adds it and put NA in that particular column.
dfs1 <- c('df1','df2')
var <- c('City_Name', 'Temp', 'Pres' , 'Wind_Hor' , 'Wind_Ver' , 'Rainf' , 'S_Moist')
lapply(dfs1, \(x) {
dfn <- get(x, envir = .GlobalEnv)
dfn[[var[which(is.na(match(var,names(dfn))))]]] <- NA
dfn <- dfn %>% select(all_of(var))
return(assign(x,dfn,envir = .GlobalEnv))
})
If I have a list of files, how can I modify the above code?
I tried following way
dfs1 <- list.files(path = 'D:/Test3', pattern = "*txt", recursive = TRUE)
var <- c('D/T', 'City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
lapply(dfs1, \(x) {
dfn <- get(x, envir = .GlobalEnv)
dfn[[var[which(is.na(match(var,names(dfn))))]]] <- NA
dfn <- dfn %>% select(all_of(var))
return(assign(x,dfn,envir = .GlobalEnv))
})
But it returns an error:
Error in get(x, envir = .GlobalEnv) :
object 'File/File1.txt' not found
Could anyone please respond how to modify the code for files.
答案1
得分: 2
library(dplyr)
library(data.table)
var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
## comment 2 columns
df1 = data.frame(City_Name = "NYC",
Temp = 20,
Pres = 10,
#Wind_Hor = 5,
Wind_Ver = 5,
# Rainf = 10,
S_Moist = 5)
## Comment 3
df2 = data.frame(#City_Name = "NYC",
Temp = 15,
#Pres = 15,
Wind_Hor = 5,
Wind_Ver = 5,
Rainf = 15)
#S_Moist = 5)
## put the dfs as a list
dfs1 <- list(df1, df2)
## loop through
processed_dfs <- lapply(seq_along(dfs1), function(x) {
dfn = dfs1[[x]]
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
setDT(dfn) # convert to data.table
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# I made the above code to make it reproducible if I wanted to combine it by reading text files from my disk then
## this will give you the files
## you need to read in the files
## to read in files create file_paths
dfs1 <- list.files(path = 'D:/Test3',
pattern = "*.txt",
recursive = TRUE)
## this will create file paths ie appending the name of the file to folder name
dfs1_file_paths = file.path( 'D:/Test3', dfs1)
var <- c('D/T', 'City_Name', 'Temp',
'Pres', 'Wind_Hor',
'Wind_Ver', 'Rainf',
'S_Moist')
processed_dfs <- lapply(seq_along(dfs1), function(x) {
file_x = dfs1_file_paths[[x]] ## file path i
## read the file
dfn <- fread(file_x) ## you can also use read.table but you need one more step to convert to data.table
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# Hope this helps
英文:
library(dplyr)
library(data.table)
# I find it easier to use data.table in this case when it comes to assigning NA's using dt[, (character_vector) := NA]
var <- c('City_Name', 'Temp', 'Pres' , 'Wind_Hor' , 'Wind_Ver' , 'Rainf' , 'S_Moist')
## comment 2 columns
df1 = data.frame(City_Name = "NYC",
Temp = 20,
Pres = 10,
#Wind_Hor = 5,
Wind_Ver = 5,
# Rainf = 10,
S_Moist = 5)
## Comment 3
df2 = data.frame(#City_Name = "NYC",
Temp = 15,
#Pres = 15,
Wind_Hor = 5,
Wind_Ver = 5,
Rainf = 15)
#S_Moist = 5)
## put the dfs as a list
dfs1 <- list(df1, df2)
## loop through
processed_dfs <- lapply(seq_along(dfs1), function(x) {
dfn = dfs1[[x]]
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
setDT(dfn) # convert to data.table
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# I made the above code to make it reproducible if I wanted to combine it by from reading text files from my disk then
## this will give you the files
## you need to read in the files
## to read in files create file_paths
dfs1 <- list.files(path = 'D:/Test3',
pattern = "*txt",
recursive = TRUE)
## this will create file paths ie appending the name of the file to folder name
dfs1_file_paths = file.path( 'D:/Test3', dfs1)
var <- c('D/T', 'City_Name', 'Temp',
'Pres', 'Wind_Hor',
'Wind_Ver', 'Rainf',
'S_Moist')
processed_dfs <- lapply(seq_along(dfs1), function(x) {
file_x = dfs1_file_paths[[x]] ## file path i
## read the file
dfn <- fread(file_x) ## use can also use read.table but you need one more step to convert to data.table
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# Hope this helps
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论