英文:
Generating file structure from filenames using Google Drive API and Python
问题
我目前的代码几乎能够创建正确的文件夹,但它们没有正确地嵌套。例如,对于以下输入文件:
[0_diss] good_journals.txt
[0_diss~historic_disses] Zhang (2020) - NAV premia.pdf
[0_diss~literature] A Guide to Writing the Literature Review.pdf
脚本生成了以下文件夹:
0_diss
0_diss~historic_disses
0_diss~literature
我希望创建名为“0_diss”、“historic_disses”和“literature”的文件夹(如果它们尚不存在),并使“historic_disses”和“literature”成为“0_diss”的子文件夹。希望这更容易理解。
以下是你的代码的翻译部分:
import os
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# 设置Drive API凭证
creds = Credentials.from_authorized_user_file(r"CREDENTIALS_PATH", ['https://www.googleapis.com/auth/drive'])
# 定义Google Drive文件夹的名称
folder_name = 'FOLDER_CONTAINING_FILES'
# 创建Drive API客户端
service = build('drive', 'v3', credentials=creds)
# 定义解析子目录和子子目录的函数
def parse_directory(filename):
# 通过'] '拆分文件名
parts = filename.split('] ')
subdirs = []
for part in parts:
# 检查部分是否是子目录
if '[' in part:
subdir = part[1:]
subdirs.append(subdir)
return subdirs
# 获取Google Drive文件夹中的文件列表
try:
query = "mimeType='application/vnd.google-apps.folder' and trashed = false and name='" + folder_name + "'"
folder = service.files().list(q=query).execute().get('files')[0]
folder_id = folder.get('id')
query = "'" + folder_id + "' in parents and trashed = false and mimeType != 'application/vnd.google-apps.folder'"
files = service.files().list(q=query).execute().get('files')
except HttpError as error:
print(f'出现错误: {error}')
files = []
# 创建字典以存储子目录和子子目录
subdirs_dict = {}
subsubdirs_dict = {}
# 循环遍历文件并根据需要创建子目录和子子目录
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir not in subdirs_dict:
# 如果不存在,则创建子目录
metadata = {'name': subdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subdir_file = service.files().create(body=metadata, fields='id').execute()
subdirs_dict[subdir] = subdir_file.get('id')
parent_id = subdirs_dict[subdir]
if subsubdir != '':
if subsubdir not in subsubdirs_dict:
# 如果不存在,则创建子子目录
metadata = {'name': subsubdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subsubdir_file = service.files().create(body=metadata, fields='id').execute()
subsubdirs_dict[subsubdir] = subsubdir_file.get('id')
parent_id = subsubdirs_dict[subsubdir]
# 循环遍历文件并将它们移动到适当的子目录或子子目录
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir in subdirs_dict:
parent_id = subdirs_dict[subdir]
if subsubdir != '' and subsubdir in subsubdirs_dict:
parent_id = subsubdirs_dict[subsubdir]
metadata = {'name': filename, 'parents': [parent_id]}
service.files().update(fileId=file.get('id'), body=metadata).execute()
希望这对你有所帮助。
英文:
I have a bunch of files sat in a Google Drive folder whose filenames contains information regarding the desired folder/subfolder structure:
[folder~subfolder] MyFile.ext
Some files simply read:
[folder] MyOtherFile.ext
I am looking for a way to sort the files into the desired file structure using thr Google Drive API, creating the folders/subfolders if they do not already exist.
This is what I have so far:
import os
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Set up the Drive API credentials
creds = Credentials.from_authorized_user_file(r"CREDENTIALS_PATH", ['https://www.googleapis.com/auth/drive'])
# Define the name of the Google Drive folder
folder_name = 'FOLDER_CONTAINING_FILES'
# Create a Drive API client
service = build('drive', 'v3', credentials=creds)
# Define the function to parse the subdirectories and sub-subdirectories
def parse_directory(filename):
# Split the filename by '] '
parts = filename.split('] ')
subdirs = []
for part in parts:
# Check if the part is a subdirectory
if '[' in part:
subdir = part[1:]
subdirs.append(subdir)
return subdirs
# Get the list of files in the Google Drive folder
try:
query = "mimeType='application/vnd.google-apps.folder' and trashed = false and name='" + folder_name + "'"
folder = service.files().list(q=query).execute().get('files')[0]
folder_id = folder.get('id')
query = "'" + folder_id + "' in parents and trashed = false and mimeType != 'application/vnd.google-apps.folder'"
files = service.files().list(q=query).execute().get('files')
except HttpError as error:
print(f'An error occurred: {error}')
files = []
# Create dictionaries to store the subdirectories and sub-subdirectories
subdirs_dict = {}
subsubdirs_dict = {}
# Loop through the files and create the subdirectories and sub-subdirectories if necessary
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir not in subdirs_dict:
# Create the subdirectory if it doesn't exist
metadata = {'name': subdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subdir_file = service.files().create(body=metadata, fields='id').execute()
subdirs_dict[subdir] = subdir_file.get('id')
parent_id = subdirs_dict[subdir]
if subsubdir != '':
if subsubdir not in subsubdirs_dict:
# Create the sub-subdirectory if it doesn't exist
metadata = {'name': subsubdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subsubdir_file = service.files().create(body=metadata, fields='id').execute()
subsubdirs_dict[subsubdir] = subsubdir_file.get('id')
parent_id = subsubdirs_dict[subsubdir]
# Loop through the files and move them to the appropriate subdirectory or sub-subdirectory
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir in subdirs_dict:
parent_id = subdirs_dict[subdir]
if subsubdir != '' and subsubdir in subsubdirs_dict:
parent_id = subsubdirs_dict[subsubdir]
metadata = {'name': filename, 'parents': [parent_id]}
service.files().update(fileId=file.get('id'), body=metadata).execute()
At present, my code produces (almost) the correct folders but they are not nested properly. For example, for these input files:
[0_diss] good_journals.txt
[0_diss~historic_disses] Zhang (2020) - NAV premia.pdf
[0_diss~literature] A Guide to Writing the Literature Review.pdf
The script produces the following folders:
0_diss
0_diss~historic_disses
0_diss~literature
What I want is for folders entitled '0_diss', 'historic_disses', and 'literature' to be created (if they do not already exist) and for 'historic_disses' and 'literature' to be a subfolder of '0_diss'. I hope this makes more sense.
Would really appreciate some help on this one, I'm not an experienced programmer and am trying my best to learn Python. Thanks.
答案1
得分: 0
此答案假设只有一个~
,因为这是您提供的数据和示例。您可以这样解析目录:
from typing import Optional
import re
def parse_directories(filename: str) -> Optional[dict]:
dirs = {}
# 确保我们有受支持的文件名,将任何不是]~的内容匹配到第一个组中
# 然后可选地将~后的任何不是]的内容匹配到第二个组中
match = re.search(r'^\[([^\]~]+)~?([^\]]+)?\]', filename)
# 如果有匹配的内容,分配始终可用的父目录,否则返回null
if not match:
return None
dirs['parent'] = match.group(1)
# 如果找到子目录,也将其分配
if match.group(2):
dirs['child'] = match.group(2)
return dirs
然后使用一个循环来创建新的目录并将文件移动到其中:
```python
for filename in filenames:
newdirs = parse_directories(filename)
if newdirs:
if 'child' in newdirs:
# 创建子文件夹
# 要验证它是否已经创建,只需将路径存储为列表
else:
# 如果需要,创建newdirs['parent']
# 将文件移动到新的/已经存在的文件夹中,不需要第二个循环
这应该让您走上正确的轨道。
英文:
This answer assumes there can only be one ~
because that's the data and examples you provided. You can parse directories like this:
from typing import Optional
import re
def parse_directories(filename: str) -> Optional[dict]:
dirs = {}
# Make sure we have a supported file name, match anything not ]~ into group 1
# Then optionally match anything not ] after ~ into group 2
match = re.search('^\[([^\]~]+)~?([^\]]+)?\]', filename)
# If anything was matched, assign always available parent, else return null
if (not match):
return None
dirs['parent'] = match.group(1)
# If child was found, assign it too
if (match.group(2)):
dirs['child'] = match.group(2)
return dirs
Then use just one loop to create new directories and move the files into them:
for filename in filenames:
newdirs = parse_directories(filename)
if newdirs:
if 'child' in newdirs:
# Create your child folder
# To verify if it's been already made simply store paths as a list
else:
# Create newdirs['parent'] if needed
# Move the file to the new/already existing folder, you don't need a 2nd loop
This should get you on the right track.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论