修改多个txt文件的数据框代码

huangapple go评论96阅读模式
英文:

Modify the code of dataframes for multiple txt files

问题

以下是修改后的代码,用于处理文件列表:

  1. dfs1 <- list.files(path = 'D:/Test3', pattern = "*txt", recursive = TRUE)
  2. var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
  3. lapply(dfs1, function(x) {
  4. dfn <- read.table(file = x, header = TRUE) # Read the data from the file
  5. missing_cols <- setdiff(var, colnames(dfn)) # Find missing columns
  6. if(length(missing_cols) > 0) {
  7. dfn[, missing_cols] <- NA # Add missing columns and fill with NA
  8. }
  9. dfn <- dfn[, var] # Select only the specified columns
  10. assign(basename(x), dfn, envir = .GlobalEnv) # Assign the modified dataframe to a new variable
  11. })

这个修改后的代码会遍历文件列表,读取每个文件的数据,检查并添加缺失的列,然后只选择指定的列,并将修改后的数据框分配给全局环境中的一个新变量,以文件名为变量名。希望这对你有所帮助。

英文:

Following code is for dataframes df1, df2. The code is reading the data columns (var) and look in each dataframe, if the var column is not present, it adds it and put NA in that particular column.

  1. dfs1 &lt;- c(&#39;df1&#39;,&#39;df2&#39;)
  2. var &lt;- c(&#39;City_Name&#39;, &#39;Temp&#39;, &#39;Pres&#39; , &#39;Wind_Hor&#39; , &#39;Wind_Ver&#39; , &#39;Rainf&#39; , &#39;S_Moist&#39;)
  3. lapply(dfs1, \(x) {
  4. dfn &lt;- get(x, envir = .GlobalEnv)
  5. dfn[[var[which(is.na(match(var,names(dfn))))]]] &lt;- NA
  6. dfn &lt;- dfn %&gt;% select(all_of(var))
  7. return(assign(x,dfn,envir = .GlobalEnv))
  8. })

If I have a list of files, how can I modify the above code?

I tried following way

  1. dfs1 &lt;- list.files(path = &#39;D:/Test3&#39;, pattern = &quot;*txt&quot;, recursive = TRUE)
  2. var &lt;- c(&#39;D/T&#39;, &#39;City_Name&#39;, &#39;Temp&#39;, &#39;Pres&#39;, &#39;Wind_Hor&#39;, &#39;Wind_Ver&#39;, &#39;Rainf&#39;, &#39;S_Moist&#39;)
  3. lapply(dfs1, \(x) {
  4. dfn &lt;- get(x, envir = .GlobalEnv)
  5. dfn[[var[which(is.na(match(var,names(dfn))))]]] &lt;- NA
  6. dfn &lt;- dfn %&gt;% select(all_of(var))
  7. return(assign(x,dfn,envir = .GlobalEnv))
  8. })

But it returns an error:

  1. Error in get(x, envir = .GlobalEnv) :
  2. object &#39;File/File1.txt&#39; not found

Could anyone please respond how to modify the code for files.

答案1

得分: 2

  1. library(dplyr)
  2. library(data.table)
  3. var <- c('City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
  4. ## comment 2 columns
  5. df1 = data.frame(City_Name = "NYC",
  6. Temp = 20,
  7. Pres = 10,
  8. #Wind_Hor = 5,
  9. Wind_Ver = 5,
  10. # Rainf = 10,
  11. S_Moist = 5)
  12. ## Comment 3
  13. df2 = data.frame(#City_Name = "NYC",
  14. Temp = 15,
  15. #Pres = 15,
  16. Wind_Hor = 5,
  17. Wind_Ver = 5,
  18. Rainf = 15)
  19. #S_Moist = 5)
  20. ## put the dfs as a list
  21. dfs1 <- list(df1, df2)
  22. ## loop through
  23. processed_dfs <- lapply(seq_along(dfs1), function(x) {
  24. dfn = dfs1[[x]]
  25. dfn_nms = names(dfn)
  26. #get missing column names
  27. var_missing = var[!var %in% dfn_nms]
  28. setDT(dfn) # convert to data.table
  29. dfn[, (var_missing) := NA] # asign NA to missing
  30. dfn[, ..var] ## data.table select statement
  31. })
  32. ## combine final output
  33. ## dplyr method
  34. final_df <- bind_rows(processed_dfs)
  35. ## if you want final as data.table
  36. final_df <- rbindlist(processed_dfs)
  37. # I made the above code to make it reproducible if I wanted to combine it by reading text files from my disk then
  38. ## this will give you the files
  39. ## you need to read in the files
  40. ## to read in files create file_paths
  41. dfs1 <- list.files(path = 'D:/Test3',
  42. pattern = "*.txt",
  43. recursive = TRUE)
  44. ## this will create file paths ie appending the name of the file to folder name
  45. dfs1_file_paths = file.path( 'D:/Test3', dfs1)
  46. var <- c('D/T', 'City_Name', 'Temp',
  47. 'Pres', 'Wind_Hor',
  48. 'Wind_Ver', 'Rainf',
  49. 'S_Moist')
  50. processed_dfs <- lapply(seq_along(dfs1), function(x) {
  51. file_x = dfs1_file_paths[[x]] ## file path i
  52. ## read the file
  53. dfn <- fread(file_x) ## you can also use read.table but you need one more step to convert to data.table
  54. dfn_nms = names(dfn)
  55. #get missing column names
  56. var_missing = var[!var %in% dfn_nms]
  57. dfn[, (var_missing) := NA] # asign NA to missing
  58. dfn[, ..var] ## data.table select statement
  59. })
  60. ## combine final output
  61. ## dplyr method
  62. final_df <- bind_rows(processed_dfs)
  63. ## if you want final as data.table
  64. final_df <- rbindlist(processed_dfs)
  65. # Hope this helps
英文:
  1. library(dplyr)
  2. library(data.table)
  3. # I find it easier to use data.table in this case when it comes to assigning NA&#39;s using dt[, (character_vector) := NA]
  4. var &lt;- c(&#39;City_Name&#39;, &#39;Temp&#39;, &#39;Pres&#39; , &#39;Wind_Hor&#39; , &#39;Wind_Ver&#39; , &#39;Rainf&#39; , &#39;S_Moist&#39;)
  5. ## comment 2 columns
  6. df1 = data.frame(City_Name = &quot;NYC&quot;,
  7. Temp = 20,
  8. Pres = 10,
  9. #Wind_Hor = 5,
  10. Wind_Ver = 5,
  11. # Rainf = 10,
  12. S_Moist = 5)
  13. ## Comment 3
  14. df2 = data.frame(#City_Name = &quot;NYC&quot;,
  15. Temp = 15,
  16. #Pres = 15,
  17. Wind_Hor = 5,
  18. Wind_Ver = 5,
  19. Rainf = 15)
  20. #S_Moist = 5)
  21. ## put the dfs as a list
  22. dfs1 &lt;- list(df1, df2)
  23. ## loop through
  24. processed_dfs &lt;- lapply(seq_along(dfs1), function(x) {
  25. dfn = dfs1[[x]]
  26. dfn_nms = names(dfn)
  27. #get missing column names
  28. var_missing = var[!var %in% dfn_nms]
  29. setDT(dfn) # convert to data.table
  30. dfn[, (var_missing) := NA] # asign NA to missing
  31. dfn[, ..var] ## data.table select statement
  32. })
  33. ## combine final output
  34. ## dplyr method
  35. final_df &lt;- bind_rows(processed_dfs)
  36. ## if you want final as data.table
  37. final_df &lt;- rbindlist(processed_dfs)
  38. # I made the above code to make it reproducible if I wanted to combine it by from reading text files from my disk then
  39. ## this will give you the files
  40. ## you need to read in the files
  41. ## to read in files create file_paths
  42. dfs1 &lt;- list.files(path = &#39;D:/Test3&#39;,
  43. pattern = &quot;*txt&quot;,
  44. recursive = TRUE)
  45. ## this will create file paths ie appending the name of the file to folder name
  46. dfs1_file_paths = file.path( &#39;D:/Test3&#39;, dfs1)
  47. var &lt;- c(&#39;D/T&#39;, &#39;City_Name&#39;, &#39;Temp&#39;,
  48. &#39;Pres&#39;, &#39;Wind_Hor&#39;,
  49. &#39;Wind_Ver&#39;, &#39;Rainf&#39;,
  50. &#39;S_Moist&#39;)
  51. processed_dfs &lt;- lapply(seq_along(dfs1), function(x) {
  52. file_x = dfs1_file_paths[[x]] ## file path i
  53. ## read the file
  54. dfn &lt;- fread(file_x) ## use can also use read.table but you need one more step to convert to data.table
  55. dfn_nms = names(dfn)
  56. #get missing column names
  57. var_missing = var[!var %in% dfn_nms]
  58. dfn[, (var_missing) := NA] # asign NA to missing
  59. dfn[, ..var] ## data.table select statement
  60. })
  61. ## combine final output
  62. ## dplyr method
  63. final_df &lt;- bind_rows(processed_dfs)
  64. ## if you want final as data.table
  65. final_df &lt;- rbindlist(processed_dfs)
  66. # Hope this helps

huangapple
  • 本文由 发表于 2023年7月18日 10:51:51
  • 转载请务必保留本文链接:https://go.coder-hub.com/76709242.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定