修改多个txt文件的数据框代码

huangapple go评论63阅读模式
英文:

Modify the code of dataframes for multiple txt files

问题

以下是修改后的代码,用于处理文件列表:

dfs1 <- list.files(path = 'D:/Test3', pattern = "*txt", recursive = TRUE)
var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')

lapply(dfs1, function(x) {
  dfn <- read.table(file = x, header = TRUE) # Read the data from the file
  missing_cols <- setdiff(var, colnames(dfn)) # Find missing columns

  if(length(missing_cols) > 0) {
    dfn[, missing_cols] <- NA # Add missing columns and fill with NA
  }

  dfn <- dfn[, var] # Select only the specified columns
  assign(basename(x), dfn, envir = .GlobalEnv) # Assign the modified dataframe to a new variable
})

这个修改后的代码会遍历文件列表,读取每个文件的数据,检查并添加缺失的列,然后只选择指定的列,并将修改后的数据框分配给全局环境中的一个新变量,以文件名为变量名。希望这对你有所帮助。

英文:

Following code is for dataframes df1, df2. The code is reading the data columns (var) and look in each dataframe, if the var column is not present, it adds it and put NA in that particular column.

dfs1 &lt;- c(&#39;df1&#39;,&#39;df2&#39;)

var &lt;- c(&#39;City_Name&#39;,  &#39;Temp&#39;,  &#39;Pres&#39; , &#39;Wind_Hor&#39; , &#39;Wind_Ver&#39; , &#39;Rainf&#39; , &#39;S_Moist&#39;)

lapply(dfs1, \(x) {
  dfn &lt;- get(x, envir = .GlobalEnv)
  dfn[[var[which(is.na(match(var,names(dfn))))]]] &lt;- NA
  dfn &lt;- dfn %&gt;% select(all_of(var))
  return(assign(x,dfn,envir = .GlobalEnv))
})

If I have a list of files, how can I modify the above code?

I tried following way

dfs1 &lt;- list.files(path = &#39;D:/Test3&#39;, pattern = &quot;*txt&quot;, recursive = TRUE)
var &lt;- c(&#39;D/T&#39;, &#39;City_Name&#39;, &#39;Temp&#39;, &#39;Pres&#39;, &#39;Wind_Hor&#39;, &#39;Wind_Ver&#39;, &#39;Rainf&#39;, &#39;S_Moist&#39;)
lapply(dfs1, \(x) {
  dfn &lt;- get(x, envir = .GlobalEnv)
  dfn[[var[which(is.na(match(var,names(dfn))))]]] &lt;- NA
  dfn &lt;- dfn %&gt;% select(all_of(var))
  return(assign(x,dfn,envir = .GlobalEnv))
})

But it returns an error:

Error in get(x, envir = .GlobalEnv) :
object &#39;File/File1.txt&#39; not found

Could anyone please respond how to modify the code for files.

答案1

得分: 2

library(dplyr)
library(data.table)

var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')

## comment 2 columns 
df1 = data.frame(City_Name = "NYC", 
                 Temp = 20,
                 Pres = 10,
                 #Wind_Hor = 5,
                 Wind_Ver = 5,
                 # Rainf = 10,
                 S_Moist = 5)

## Comment 3
df2 = data.frame(#City_Name = "NYC", 
  Temp = 15,
  #Pres = 15,
  Wind_Hor = 5,
  Wind_Ver = 5,
  Rainf = 15)
#S_Moist = 5)
## put the dfs as a list

dfs1 <- list(df1, df2)

## loop through 

processed_dfs <- lapply(seq_along(dfs1), function(x) {


  dfn = dfs1[[x]]
  dfn_nms = names(dfn)  
  #get missing column names
  var_missing = var[!var %in% dfn_nms]

  setDT(dfn) # convert to data.table 

  dfn[, (var_missing) := NA] # asign NA to missing

  dfn[, ..var] ## data.table select statement 
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)

## if you want final as data.table
final_df <- rbindlist(processed_dfs)

# I made the above code to make it reproducible if I wanted to combine it by reading text files from my disk then

## this will give you the files 
## you need to read in the files
## to read in files create file_paths

dfs1 <- list.files(path = 'D:/Test3',
                   pattern = "*.txt", 
                   recursive = TRUE)

## this will create file paths ie appending the name of the file to folder name
dfs1_file_paths = file.path( 'D:/Test3', dfs1)

var <- c('D/T', 'City_Name', 'Temp', 
         'Pres', 'Wind_Hor', 
         'Wind_Ver', 'Rainf', 
         'S_Moist')

processed_dfs <- lapply(seq_along(dfs1), function(x) {


  file_x = dfs1_file_paths[[x]] ## file path i

  ## read the file
  dfn <- fread(file_x) ## you can also use read.table but you need one more step to convert to data.table

  dfn_nms = names(dfn)  

  #get missing column names
  var_missing = var[!var %in% dfn_nms]


  dfn[, (var_missing) := NA] # asign NA to missing

  dfn[, ..var] ## data.table select statement 
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)

## if you want final as data.table
final_df <- rbindlist(processed_dfs)

# Hope this helps
英文:
library(dplyr)
library(data.table)
# I find it easier to use data.table in this case when it comes to assigning NA&#39;s using dt[, (character_vector) := NA]
var &lt;- c(&#39;City_Name&#39;,  &#39;Temp&#39;,  &#39;Pres&#39; , &#39;Wind_Hor&#39; , &#39;Wind_Ver&#39; , &#39;Rainf&#39; , &#39;S_Moist&#39;)
## comment 2 columns 
df1 = data.frame(City_Name = &quot;NYC&quot;, 
Temp = 20,
Pres = 10,
#Wind_Hor = 5,
Wind_Ver = 5,
# Rainf = 10,
S_Moist = 5)
## Comment 3
df2 = data.frame(#City_Name = &quot;NYC&quot;, 
Temp = 15,
#Pres = 15,
Wind_Hor = 5,
Wind_Ver = 5,
Rainf = 15)
#S_Moist = 5)
## put the dfs as a list
dfs1 &lt;- list(df1, df2)
## loop through 
processed_dfs &lt;- lapply(seq_along(dfs1), function(x) {
dfn = dfs1[[x]]
dfn_nms = names(dfn)  
#get missing column names
var_missing = var[!var %in% dfn_nms]
setDT(dfn) # convert to data.table 
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement 
})
## combine final output
## dplyr method
final_df &lt;- bind_rows(processed_dfs)
## if you want final as data.table
final_df &lt;- rbindlist(processed_dfs)
# I made the above code to make it reproducible if I wanted to combine it by from reading text files from my disk then
## this will give you the files 
## you need to read in the files
## to read in files create file_paths
dfs1 &lt;- list.files(path = &#39;D:/Test3&#39;,
pattern = &quot;*txt&quot;, 
recursive = TRUE)
## this will create file paths ie appending the name of the file to folder name
dfs1_file_paths = file.path( &#39;D:/Test3&#39;, dfs1)
var &lt;- c(&#39;D/T&#39;, &#39;City_Name&#39;, &#39;Temp&#39;, 
&#39;Pres&#39;, &#39;Wind_Hor&#39;, 
&#39;Wind_Ver&#39;, &#39;Rainf&#39;, 
&#39;S_Moist&#39;)
processed_dfs &lt;- lapply(seq_along(dfs1), function(x) {
file_x = dfs1_file_paths[[x]] ## file path i
## read the file
dfn &lt;- fread(file_x) ## use can also use read.table but you need one more step to convert to data.table
dfn_nms = names(dfn)  
#get missing column names
var_missing = var[!var %in% dfn_nms]
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement 
})
## combine final output
## dplyr method
final_df &lt;- bind_rows(processed_dfs)
## if you want final as data.table
final_df &lt;- rbindlist(processed_dfs)
# Hope this helps 

huangapple
  • 本文由 发表于 2023年7月18日 10:51:51
  • 转载请务必保留本文链接:https://go.coder-hub.com/76709242.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定