文件监视器循环无法在重新运行代码时继续上次的位置。

huangapple go评论108阅读模式
英文:

File watcher loop cannot continue where it left off when re-running the code

问题

以下是代码的翻译部分:

  1. import os
  2. import glob
  3. import json
  4. import pandas as pd
  5. import time
  6. from datetime import datetime
  7. import openpyxl
  8. from openpyxl.utils.dataframe import dataframe_to_rows
  9. def jsonFilesInDirectory(my_dir: str):
  10. # 获取目录中的 JSON 文件列表
  11. json_files = glob.glob(os.path.join(my_dir, "*.json"))
  12. return json_files
  13. def clean_value(value):
  14. # 清理数字值,去除不必要的字符
  15. return float(value.replace('\xa0s', '').replace('\xa0ms', '').replace(',', ''))
  16. def doThingsWithNewFiles(fileDiff: list, my_dir: str, workbook):
  17. for file_name in fileDiff:
  18. file_path = os.path.join(my_dir, file_name)
  19. with open(file_path, 'r', encoding='utf-8') as file:
  20. try:
  21. json_data = json.load(file)
  22. # 从 JSON 文件中提取数据
  23. url = json_data["finalUrl"]
  24. fetch_time = json_data["fetchTime"]
  25. audits = json_data["audits"]
  26. fcp_metric = audits["first-contentful-paint"]["id"]
  27. fcp_value = audits["first-contentful-paint"]["displayValue"]
  28. fcp_score = audits["first-contentful-paint"]["score"]
  29. lcp_metric = audits["largest-contentful-paint"]["id"]
  30. lcp_value = audits["largest-contentful-paint"]["displayValue"]
  31. lcp_score = audits["largest-contentful-paint"]["score"]
  32. fmp_metric = audits["first-meaningful-paint"]["id"]
  33. fmp_value = audits["first-meaningful-paint"]["displayValue"]
  34. fmp_score = audits["first-meaningful-paint"]["score"]
  35. si_metric = audits["speed-index"]["id"]
  36. si_value = audits["speed-index"]["displayValue"]
  37. si_score = audits["speed-index"]["score"]
  38. tbt_metric = audits["total-blocking-time"]["id"]
  39. tbt_value = audits["total-blocking-time"]["displayValue"]
  40. tbt_score = audits["total-blocking-time"]["score"]
  41. cls_metric = audits["cumulative-layout-shift"]["id"]
  42. cls_value = audits["cumulative-layout-shift"]["displayValue"]
  43. cls_score = audits["cumulative-layout-shift"]["score"]
  44. categories = json_data["categories"]
  45. perf_metric = categories["performance"]["id"]
  46. perf_value = 0
  47. perf_score = categories["performance"]["score"]
  48. # 清理值并格式化获取时间
  49. cleaned_fcp_value = clean_value(fcp_value)
  50. cleaned_lcp_value = clean_value(lcp_value)
  51. cleaned_fmp_value = clean_value(fmp_value)
  52. cleaned_si_value = clean_value(si_value)
  53. cleaned_tbt_value = clean_value(tbt_value)
  54. datetime_obj = datetime.strptime(fetch_time, "%Y-%m-%dT%H:%M:%S.%fZ")
  55. cleaned_fetch_time = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
  56. # 为 DataFrame 创建数据字典
  57. data_dict = {
  58. "fetch_time": [cleaned_fetch_time] * 7,
  59. "url":
    * 7,
  60. "metric": ["performance","first_contentful_paint", "largest_contentful_paint",
  61. "first-meaningful-paint", "speed-index", "total-blocking-time",
  62. "cumulative-layout-shift"],
  63. "value": [perf_value, cleaned_fcp_value, cleaned_lcp_value,
  64. cleaned_fmp_value, cleaned_si_value, cleaned_tbt_value,
  65. cls_value],
  66. "score": [perf_score, fcp_score, lcp_score, fmp_score, si_score, tbt_score, cls_score]
  67. }
  68. df = pd.DataFrame(data_dict)
  69. # 将 DataFrame 追加到 Excel 文件
  70. sheet_name = "Sheet1"
  71. if sheet_name in workbook.sheetnames:
  72. sheet = workbook[sheet_name]
  73. startrow = sheet.max_row
  74. for row in dataframe_to_rows(df, index=False, header=False):
  75. sheet.append(row)
  76. else:
  77. sheet = workbook.create_sheet(sheet_name)
  78. for row in dataframe_to_rows(df, index=False, header=True):
  79. sheet.append(row)
  80. print(f"从文件 {file_name} 中提取数据并追加到 Excel 文件")
  81. except KeyError as e:
  82. print(f"处理文件 '{file_name}' 时发生 KeyError 错误: {e}")
  83. except json.JSONDecodeError as e:
  84. print(f"处理文件 '{file_name}' 时发生 JSONDecodeError 错误: {e}")
  85. except Exception as e:
  86. print(f"处理文件 '{file_name}' 时发生错误: {e}")
  87. # 其余部分省略...

如果您需要对其他部分进行翻译,请提供相应的代码段,并告诉我需要翻译的内容。

英文:

I have create this File Watcher Loop, when I run the code, it scans a specific folder for .json files and append to an 'output.xls' file. Then the code continues to run in a loop, scanning new files in the folder, and repeat the process. This works just fine, however, when I stop the code (laptop turn-off, or something), new files are still being added to the folder, and then when I re-run the code, I cannot continue where I left off, I have to delete the output.xls file and start over again.

Is there a way for this to save the history of the files already appended when I stop the code, and continue adding files that have not been appended when I re-run the code?

  1. import os
  2. import glob
  3. import json
  4. import pandas as pd
  5. import time
  6. from datetime import datetime
  7. import openpyxl
  8. from openpyxl.utils.dataframe import dataframe_to_rows
  9. def jsonFilesInDirectory(my_dir: str):
  10. # Get a list of JSON files in the directory
  11. json_files = glob.glob(os.path.join(my_dir, "*.json"))
  12. return json_files
  13. def clean_value(value):
  14. # Clean up numeric values by removing unnecessary characters
  15. return float(value.replace('\xa0s', '').replace('\xa0ms', '').replace(',', ''))
  16. def doThingsWithNewFiles(fileDiff: list, my_dir: str, workbook):
  17. for file_name in fileDiff:
  18. file_path = os.path.join(my_dir, file_name)
  19. with open(file_path, 'r', encoding='utf-8') as file:
  20. try:
  21. json_data = json.load(file)
  22. # Extract data from the JSON file
  23. url = json_data["finalUrl"]
  24. fetch_time = json_data["fetchTime"]
  25. audits = json_data["audits"]
  26. fcp_metric = audits["first-contentful-paint"]["id"]
  27. fcp_value = audits["first-contentful-paint"]["displayValue"]
  28. fcp_score = audits["first-contentful-paint"]["score"]
  29. lcp_metric = audits["largest-contentful-paint"]["id"]
  30. lcp_value = audits["largest-contentful-paint"]["displayValue"]
  31. lcp_score = audits["largest-contentful-paint"]["score"]
  32. fmp_metric = audits["first-meaningful-paint"]["id"]
  33. fmp_value = audits["first-meaningful-paint"]["displayValue"]
  34. fmp_score = audits["first-meaningful-paint"]["score"]
  35. si_metric = audits["speed-index"]["id"]
  36. si_value = audits["speed-index"]["displayValue"]
  37. si_score = audits["speed-index"]["score"]
  38. tbt_metric = audits["total-blocking-time"]["id"]
  39. tbt_value = audits["total-blocking-time"]["displayValue"]
  40. tbt_score = audits["total-blocking-time"]["score"]
  41. cls_metric = audits["cumulative-layout-shift"]["id"]
  42. cls_value = audits["cumulative-layout-shift"]["displayValue"]
  43. cls_score = audits["cumulative-layout-shift"]["score"]
  44. categories = json_data["categories"]
  45. perf_metric = categories["performance"]["id"]
  46. perf_value = 0
  47. perf_score = categories["performance"]["score"]
  48. # Clean up values and format the fetch time
  49. cleaned_fcp_value = clean_value(fcp_value)
  50. cleaned_lcp_value = clean_value(lcp_value)
  51. cleaned_fmp_value = clean_value(fmp_value)
  52. cleaned_si_value = clean_value(si_value)
  53. cleaned_tbt_value = clean_value(tbt_value)
  54. datetime_obj = datetime.strptime(fetch_time, "%Y-%m-%dT%H:%M:%S.%fZ")
  55. cleaned_fetch_time = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
  56. # Create a data dictionary for the DataFrame
  57. data_dict = {
  58. "fetch_time": [cleaned_fetch_time] * 7,
  59. "url":
    * 7,
  60. "metric": ["performance","first_contentful_paint", "largest_contentful_paint",
  61. "first-meaningful-paint", "speed-index", "total-blocking-time",
  62. "cumulative-layout-shift"],
  63. "value": [perf_value, cleaned_fcp_value, cleaned_lcp_value,
  64. cleaned_fmp_value, cleaned_si_value, cleaned_tbt_value,
  65. cls_value],
  66. "score": [perf_score, fcp_score, lcp_score, fmp_score, si_score, tbt_score, cls_score]
  67. }
  68. df = pd.DataFrame(data_dict)
  69. # Append the DataFrame to the Excel file
  70. sheet_name = "Sheet1"
  71. if sheet_name in workbook.sheetnames:
  72. sheet = workbook[sheet_name]
  73. startrow = sheet.max_row
  74. for row in dataframe_to_rows(df, index=False, header=False):
  75. sheet.append(row)
  76. else:
  77. sheet = workbook.create_sheet(sheet_name)
  78. for row in dataframe_to_rows(df, index=False, header=True):
  79. sheet.append(row)
  80. print(f"Data extracted from {file_name} and appended to the Excel file")
  81. except KeyError as e:
  82. print(f"KeyError occurred while processing file '{file_name}': {e}")
  83. except json.JSONDecodeError as e:
  84. print(f"JSONDecodeError occurred while processing file '{file_name}': {e}")
  85. except Exception as e:
  86. print(f"An error occurred while processing file '{file_name}': {e}")
  87. def fileWatcher(my_dir: str, pollTime: int):
  88. excel_file_path = os.path.join(my_dir, 'output.xlsx')
  89. existingFiles = []
  90. # Check if the output file already exists
  91. if os.path.isfile(excel_file_path):
  92. try:
  93. workbook = openpyxl.load_workbook(excel_file_path)
  94. existingFiles = jsonFilesInDirectory(my_dir)
  95. # Process the existing JSON files and append data to the Excel file
  96. doThingsWithNewFiles(existingFiles, my_dir, workbook)
  97. print("Existing JSON files processed and data appended to the Excel file")
  98. except openpyxl.utils.exceptions.InvalidFileException:
  99. workbook = openpyxl.Workbook()
  100. else:
  101. workbook = openpyxl.Workbook()
  102. # Check for new files at startup
  103. newFileList = jsonFilesInDirectory(my_dir)
  104. fileDiff = listComparison(existingFiles, newFileList)
  105. existingFiles = newFileList
  106. if len(fileDiff) > 0:
  107. # Process the new files and append data to the Excel file
  108. doThingsWithNewFiles(fileDiff, my_dir, workbook)
  109. # Save the Excel file
  110. workbook.save(excel_file_path)
  111. print(f"DataFrame exported to {excel_file_path}")
  112. while True:
  113. time.sleep(pollTime)
  114. # Get the updated list of JSON files in the directory
  115. newFileList = jsonFilesInDirectory(my_dir)
  116. # Find the difference between the previous and new file lists
  117. fileDiff = listComparison(existingFiles, newFileList)
  118. existingFiles = newFileList
  119. if len(fileDiff) > 0:
  120. # Process the new files and append data to the Excel file
  121. doThingsWithNewFiles(fileDiff, my_dir, workbook)
  122. # Save the Excel file
  123. workbook.save(excel_file_path)
  124. print(f"DataFrame exported to {excel_file_path}")
  125. def listComparison(originalList: list, newList: list):
  126. # Compare two lists and return the differences
  127. differencesList = [x for x in newList if x not in originalList]
  128. return differencesList
  129. my_dir = r"Z:"
  130. pollTime = 60
  131. fileWatcher(my_dir, pollTime)

答案1

得分: 0

最简单的想法:获取自上次更新Excel文件以来更改的文件列表,使用os.path.getmtime获取Excel文件和所有JSON文件的最后更改时间,并选择那些更新的JSON文件。如果Excel文件存在,在启动时执行此操作,并处理所选的每个JSON文件,就好像它们是由监视程序检测到的一样。

然而,这可能会引入一些关于在断电附近处理的文件的歧义。因此,更准确的想法是:保存已处理的JSON文件列表,无论是在Excel文件内部还是在另一个地方(例如另一个文件或数据库中)。

甚至更精细的想法是使用一个数据库,将数据保存为与JSON文件相关联的键,将数据库用作唯一的真相来源,并根据需要从数据库生成Excel文件。

另外,覆盖Excel文件是可能的故障点。在这种情况下的一个良好做法是将数据写入同一目录中的临时文件,然后执行os.rename,这将原子地用新文件替换旧文件。

英文:

The simplest idea: To get the list of files changed since your last update to the Excel file, use os.path.getmtime to get the time of the last change of the Excel file and of all the JSON files, and select those JSON files that are newer. Do this at startup if the Excel file exists, and process each of the selected JSON files as if they were detected by the watcher.

However this could introduce some ambiguity about the files that are processed very near the power loss. So instead, the more accurate idea: save the list of processed JSON files, whether inside the Excel file, or in a separate place (e.g. another file, or a database).

An even more refined idea is to use a database where you save the data keyed to the JSON file, using the database as the single source of truth, and generate the Excel file from the database as needed.


As an aside, overwriting the Excel file is a possible point of failure. A good practice to do in this situation is to write to a temporary file in the same directory, then perform os.rename, which will atomically replace the old file with the new one.

答案2

得分: 0

以下是您要的代码翻译:

  1. 你可以创建一个文本文件其中存储了扫描文件的列表
  2. 更新了你的代码以检查文件是否存在并将文本文件写入
  3. datetime 导入 datetime
  4. 导入 glob
  5. 导入 json
  6. 导入 openpyxl
  7. openpyxl.utils.dataframe 导入 dataframe_to_rows
  8. 导入 os
  9. 导入 pandas 作为 pd
  10. 导入 time
  11. def jsonFilesInDirectory(my_dir: str):
  12. # 获取目录中的 JSON 文件列表
  13. json_files = glob.glob(os.path.join(my_dir, "*.json"))
  14. return json_files
  15. def clean_value(value):
  16. # 清理数值,去除不必要的字符
  17. return float(value.replace('\xa0s', '').replace('\xa0ms', '').replace(',', ''))
  18. def doThingsWithNewFiles(fileDiff: list, my_dir: str, workbook):
  19. for file_name in fileDiff:
  20. file_path = os.path.join(my_dir, file_name)
  21. with open(file_path, 'r', encoding='utf-8') as file:
  22. try:
  23. json_data = json.load(file)
  24. # 从 JSON 文件中提取数据
  25. url = json_data["finalUrl"]
  26. fetch_time = json_data["fetchTime"]
  27. audits = json_data["audits"]
  28. fcp_metric = audits["first-contentful-paint"]["id"]
  29. fcp_value = audits["first-contentful-paint"]["displayValue"]
  30. fcp_score = audits["first-contentful-paint"]["score"]
  31. lcp_metric = audits["largest-contentful-paint"]["id"]
  32. lcp_value = audits["largest-contentful-paint"]["displayValue"]
  33. lcp_score = audits["largest-contentful-paint"]["score"]
  34. fmp_metric = audits["first-meaningful-paint"]["id"]
  35. fmp_value = audits["first-meaningful-paint"]["displayValue"]
  36. fmp_score = audits["first-meaningful-paint"]["score"]
  37. si_metric = audits["speed-index"]["id"]
  38. si_value = audits["speed-index"]["displayValue"]
  39. si_score = audits["speed-index"]["score"]
  40. tbt_metric = audits["total-blocking-time"]["id"]
  41. tbt_value = audits["total-blocking-time"]["displayValue"]
  42. tbt_score = audits["total-blocking-time"]["score"]
  43. cls_metric = audits["cumulative-layout-shift"]["id"]
  44. cls_value = audits["cumulative-layout-shift"]["displayValue"]
  45. cls_score = audits["cumulative-layout-shift"]["score"]
  46. categories = json_data["categories"]
  47. perf_metric = categories["performance"]["id"]
  48. perf_value = 0
  49. perf_score = categories["performance"]["score"]
  50. # 清理值并格式化提取时间
  51. cleaned_fcp_value = clean_value(fcp_value)
  52. cleaned_lcp_value = clean_value(lcp_value)
  53. cleaned_fmp_value = clean_value(fmp_value)
  54. cleaned_si_value = clean_value(si_value)
  55. cleaned_tbt_value = clean_value(tbt_value)
  56. datetime_obj = datetime.strptime(fetch_time, "%Y-%m-%dT%H:%M:%S.%fZ")
  57. cleaned_fetch_time = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
  58. # 为 DataFrame 创建数据字典
  59. data_dict = {
  60. "fetch_time": [cleaned_fetch_time] * 7,
  61. "url":
    * 7,
  62. "metric": [
  63. "performance",
  64. "first_contentful_paint",
  65. "largest_contentful_paint",
  66. "first-meaningful-paint",
  67. "speed-index",
  68. "total-blocking-time",
  69. "cumulative-layout-shift"
  70. ],
  71. "value": [
  72. perf_value,
  73. cleaned_fcp_value,
  74. cleaned_lcp_value,
  75. cleaned_fmp_value,
  76. cleaned_si_value,
  77. cleaned_tbt_value,
  78. cls_value
  79. ],
  80. "score": [
  81. perf_score,
  82. fcp_score,
  83. lcp_score,
  84. fmp_score,
  85. si_score,
  86. tbt_score,
  87. cls_score]
  88. }
  89. df = pd.DataFrame(data_dict)
  90. # 将 DataFrame 添加到 Excel 文件
  91. sheet_name = "Sheet1"
  92. if sheet_name in workbook.sheetnames:
  93. sheet = workbook[sheet_name]
  94. else:
  95. sheet = workbook.create_sheet(sheet_name)
  96. for row in dataframe_to_rows(df, index=False, header=True):
  97. sheet.append(row)
  98. print(f"从 {file_name} 中提取数据并添加到 Excel 文件")
  99. except KeyError as e:
  100. print(f"在处理文件 '{file_name}' 时发生 KeyError: {e}")
  101. except json.JSONDecodeError as e:
  102. print(f"在处理文件 '{file_name}' 时发生 JSONDecodeError: {e}")
  103. except Exception as e:
  104. print(f"在处理文件 '{file_name}' 时发生错误: {e}")
  105. def fileWatcher(my_dir: str, pollTime: int):
  106. excel_file_path = os.path.join(my_dir, 'output.xlsx')
  107. existingFiles = []
  108. if os.path.exists(os.path.join(os.getcwd(), 'scanned_files.txt')):
  109. with open('scanned_files.txt', 'a+') as f:
  110. existingFiles = f.read().split('\n')
  111. # 检查输出文件是否已经存在
  112. if os.path.isfile(excel_file_path):
  113. try:
  114. workbook = openpyxl.load_workbook(excel_file_path)
  115. except openpyxl.utils.exceptions.InvalidFileException:
  116. workbook = openpyxl.Workbook()
  117. else:
  118. workbook = openpyxl.Workbook()
  119. # 处理现有的 JSON 文件并将数据添加到 Excel 文件
  120. if not "Sheet1" in workbook.sheetnames:
  121. doThingsWithNewFiles(existingFiles, my_dir, workbook)
  122. print("已处理现有的 JSON 文件并将数据添加到 Excel 文件")
  123. # 在启动时检查新文件
  124. while True:
  125. time.sleep(pollTime)
  126. # 获取目录中更新的 JSON 文件列表
  127. newFileList = jsonFilesInDirectory(my_dir)
  128. # 查找之前和新文件列表之间的差异
  129. fileDiff = listComparison(existingFiles, newFileList)
  130. existingFiles = newFileList
  131. if len(fileDiff) > 0:
  132. # 处理新文件并将数据添加到 Excel 文件
  133. doThingsWithNewFiles(fileDiff, my_dir, workbook)
  134. # 保存 Excel 文件
  135. workbook.save(excel_file_path)
  136. print(f"DataFrame 导出到 {excel_file_path}")
  137. with open('scanned_files.txt', 'w') as f:
  138. f.write('\n'.join(existingFiles))
  139. def listComparison(originalList: list, newList: list):
  140. # 比较两个列表并返回差异
  141. differencesList = [x for x in newList if x not in originalList]
  142. return differencesList
  143. my_dir = r"Z:"
  144. pollTime = 60
  145. fileWatcher(my_dir, pollTime)

如果您有任何其他疑问,请随

英文:

You can create a text file that has the list of scanned files stored.

Updated your code, to read if exists and write the text file.

  1. from datetime import datetime
  2. import glob
  3. import json
  4. import openpyxl
  5. from openpyxl.utils.dataframe import dataframe_to_rows
  6. import os
  7. import pandas as pd
  8. import time
  9. def jsonFilesInDirectory(my_dir: str):
  10. # Get a list of JSON files in the directory
  11. json_files = glob.glob(os.path.join(my_dir, "*.json"))
  12. return json_files
  13. def clean_value(value):
  14. # Clean up numeric values by removing unnecessary characters
  15. return float(value.replace('\xa0s', '').replace('\xa0ms', '').replace(',', ''))
  16. def doThingsWithNewFiles(fileDiff: list, my_dir: str, workbook):
  17. for file_name in fileDiff:
  18. file_path = os.path.join(my_dir, file_name)
  19. with open(file_path, 'r', encoding='utf-8') as file:
  20. try:
  21. json_data = json.load(file)
  22. # Extract data from the JSON file
  23. url = json_data["finalUrl"]
  24. fetch_time = json_data["fetchTime"]
  25. audits = json_data["audits"]
  26. fcp_metric = audits["first-contentful-paint"]["id"]
  27. fcp_value = audits["first-contentful-paint"]["displayValue"]
  28. fcp_score = audits["first-contentful-paint"]["score"]
  29. lcp_metric = audits["largest-contentful-paint"]["id"]
  30. lcp_value = audits["largest-contentful-paint"]["displayValue"]
  31. lcp_score = audits["largest-contentful-paint"]["score"]
  32. fmp_metric = audits["first-meaningful-paint"]["id"]
  33. fmp_value = audits["first-meaningful-paint"]["displayValue"]
  34. fmp_score = audits["first-meaningful-paint"]["score"]
  35. si_metric = audits["speed-index"]["id"]
  36. si_value = audits["speed-index"]["displayValue"]
  37. si_score = audits["speed-index"]["score"]
  38. tbt_metric = audits["total-blocking-time"]["id"]
  39. tbt_value = audits["total-blocking-time"]["displayValue"]
  40. tbt_score = audits["total-blocking-time"]["score"]
  41. cls_metric = audits["cumulative-layout-shift"]["id"]
  42. cls_value = audits["cumulative-layout-shift"]["displayValue"]
  43. cls_score = audits["cumulative-layout-shift"]["score"]
  44. categories = json_data["categories"]
  45. perf_metric = categories["performance"]["id"]
  46. perf_value = 0
  47. perf_score = categories["performance"]["score"]
  48. # Clean up values and format the fetch time
  49. cleaned_fcp_value = clean_value(fcp_value)
  50. cleaned_lcp_value = clean_value(lcp_value)
  51. cleaned_fmp_value = clean_value(fmp_value)
  52. cleaned_si_value = clean_value(si_value)
  53. cleaned_tbt_value = clean_value(tbt_value)
  54. datetime_obj = datetime.strptime(fetch_time, "%Y-%m-%dT%H:%M:%S.%fZ")
  55. cleaned_fetch_time = datetime_obj.strftime("%Y-%m-%d %H:%M:%S")
  56. # Create a data dictionary for the DataFrame
  57. data_dict = {
  58. "fetch_time": [cleaned_fetch_time] * 7,
  59. "url":
    * 7,
  60. "metric": [
  61. "performance",
  62. "first_contentful_paint",
  63. "largest_contentful_paint",
  64. "first-meaningful-paint",
  65. "speed-index",
  66. "total-blocking-time",
  67. "cumulative-layout-shift"
  68. ],
  69. "value": [
  70. perf_value,
  71. cleaned_fcp_value,
  72. cleaned_lcp_value,
  73. cleaned_fmp_value,
  74. cleaned_si_value,
  75. cleaned_tbt_value,
  76. cls_value
  77. ],
  78. "score": [
  79. perf_score,
  80. fcp_score,
  81. lcp_score,
  82. fmp_score,
  83. si_score,
  84. tbt_score,
  85. cls_score]
  86. }
  87. df = pd.DataFrame(data_dict)
  88. # Append the DataFrame to the Excel file
  89. sheet_name = "Sheet1"
  90. if sheet_name in workbook.sheetnames:
  91. sheet = workbook[sheet_name]
  92. else:
  93. sheet = workbook.create_sheet(sheet_name)
  94. for row in dataframe_to_rows(df, index=False, header=True):
  95. sheet.append(row)
  96. print(f"Data extracted from {file_name} and appended to the Excel file")
  97. except KeyError as e:
  98. print(f"KeyError occurred while processing file '{file_name}': {e}")
  99. except json.JSONDecodeError as e:
  100. print(f"JSONDecodeError occurred while processing file '{file_name}': {e}")
  101. except Exception as e:
  102. print(f"An error occurred while processing file '{file_name}': {e}")
  103. def fileWatcher(my_dir: str, pollTime: int):
  104. excel_file_path = os.path.join(my_dir, 'output.xlsx')
  105. existingFiles = []
  106. if os.path.exists(os.path.join(os.getcwd(), 'scanned_files.txt')):
  107. with open('scanned_files.txt', 'a+') as f:
  108. existingFiles = f.read().split('\n')
  109. # Check if the output file already exists
  110. if os.path.isfile(excel_file_path):
  111. try:
  112. workbook = openpyxl.load_workbook(excel_file_path)
  113. except openpyxl.utils.exceptions.InvalidFileException:
  114. workbook = openpyxl.Workbook()
  115. else:
  116. workbook = openpyxl.Workbook()
  117. # Process the existing JSON files and append data to the Excel file
  118. if not "Sheet1" in workbook.sheetnames:
  119. doThingsWithNewFiles(existingFiles, my_dir, workbook)
  120. print("Existing JSON files processed and data appended to the Excel file")
  121. # Check for new files at startup
  122. while True:
  123. time.sleep(pollTime)
  124. # Get the updated list of JSON files in the directory
  125. newFileList = jsonFilesInDirectory(my_dir)
  126. # Find the difference between the previous and new file lists
  127. fileDiff = listComparison(existingFiles, newFileList)
  128. existingFiles = newFileList
  129. if len(fileDiff) > 0:
  130. # Process the new files and append data to the Excel file
  131. doThingsWithNewFiles(fileDiff, my_dir, workbook)
  132. # Save the Excel file
  133. workbook.save(excel_file_path)
  134. print(f"DataFrame exported to {excel_file_path}")
  135. with open('scanned_files.txt', 'w') as f:
  136. f.write('\n'.join(existingFiles))
  137. def listComparison(originalList: list, newList: list):
  138. # Compare two lists and return the differences
  139. differencesList = [x for x in newList if x not in originalList]
  140. return differencesList
  141. my_dir = r"Z:"
  142. pollTime = 60
  143. fileWatcher(my_dir, pollTime)

Couldn't test the code, let me know if there's any issue with this.

huangapple
  • 本文由 发表于 2023年7月14日 08:53:51
  • 转载请务必保留本文链接:https://go.coder-hub.com/76684076.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定