使用Google Drive API和Python从文件名生成文件结构

huangapple go评论81阅读模式
英文:

Generating file structure from filenames using Google Drive API and Python

问题

我目前的代码几乎能够创建正确的文件夹,但它们没有正确地嵌套。例如,对于以下输入文件:

[0_diss] good_journals.txt
[0_diss~historic_disses] Zhang (2020) - NAV premia.pdf
[0_diss~literature] A Guide to Writing the Literature Review.pdf

脚本生成了以下文件夹:

0_diss
0_diss~historic_disses
0_diss~literature

我希望创建名为“0_diss”、“historic_disses”和“literature”的文件夹(如果它们尚不存在),并使“historic_disses”和“literature”成为“0_diss”的子文件夹。希望这更容易理解。

以下是你的代码的翻译部分:

import os
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# 设置Drive API凭证
creds = Credentials.from_authorized_user_file(r"CREDENTIALS_PATH", ['https://www.googleapis.com/auth/drive'])

# 定义Google Drive文件夹的名称
folder_name = 'FOLDER_CONTAINING_FILES'

# 创建Drive API客户端
service = build('drive', 'v3', credentials=creds)

# 定义解析子目录和子子目录的函数
def parse_directory(filename):
    # 通过'] '拆分文件名
    parts = filename.split('] ')
    subdirs = []
    for part in parts:
        # 检查部分是否是子目录
        if '[' in part:
            subdir = part[1:]
            subdirs.append(subdir)
    return subdirs

# 获取Google Drive文件夹中的文件列表
try:
    query = "mimeType='application/vnd.google-apps.folder' and trashed = false and name='" + folder_name + "'"
    folder = service.files().list(q=query).execute().get('files')[0]
    folder_id = folder.get('id')
    query = "'" + folder_id + "' in parents and trashed = false and mimeType != 'application/vnd.google-apps.folder'"
    files = service.files().list(q=query).execute().get('files')
except HttpError as error:
    print(f'出现错误: {error}')
    files = []

# 创建字典以存储子目录和子子目录
subdirs_dict = {}
subsubdirs_dict = {}

# 循环遍历文件并根据需要创建子目录和子子目录
for file in files:
    filename = file.get('name')
    subdirs = parse_directory(filename)
    if len(subdirs) == 0:
        continue
    parent_id = folder_id
    for i in range(len(subdirs)):
        subdir = subdirs[i]
        if i == len(subdirs) - 1:
            subsubdir = ''
        else:
            subsubdir = subdirs[i+1]
        if subdir not in subdirs_dict:
            # 如果不存在,则创建子目录
            metadata = {'name': subdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
            subdir_file = service.files().create(body=metadata, fields='id').execute()
            subdirs_dict[subdir] = subdir_file.get('id')
        parent_id = subdirs_dict[subdir]
        if subsubdir != '':
            if subsubdir not in subsubdirs_dict:
                # 如果不存在,则创建子子目录
                metadata = {'name': subsubdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
                subsubdir_file = service.files().create(body=metadata, fields='id').execute()
                subsubdirs_dict[subsubdir] = subsubdir_file.get('id')
            parent_id = subsubdirs_dict[subsubdir]

# 循环遍历文件并将它们移动到适当的子目录或子子目录
for file in files:
    filename = file.get('name')
    subdirs = parse_directory(filename)
    if len(subdirs) == 0:
        continue
    parent_id = folder_id
    for i in range(len(subdirs)):
        subdir = subdirs[i]
        if i == len(subdirs) - 1:
            subsubdir = ''
        else:
            subsubdir = subdirs[i+1]
        if subdir in subdirs_dict:
            parent_id = subdirs_dict[subdir]
        if subsubdir != '' and subsubdir in subsubdirs_dict:
            parent_id = subsubdirs_dict[subsubdir]
        metadata = {'name': filename, 'parents': [parent_id]}
        service.files().update(fileId=file.get('id'), body=metadata).execute()

希望这对你有所帮助。

英文:

I have a bunch of files sat in a Google Drive folder whose filenames contains information regarding the desired folder/subfolder structure:

[folder~subfolder] MyFile.ext

Some files simply read:

[folder] MyOtherFile.ext

I am looking for a way to sort the files into the desired file structure using thr Google Drive API, creating the folders/subfolders if they do not already exist.

This is what I have so far:

import os
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Set up the Drive API credentials
creds = Credentials.from_authorized_user_file(r"CREDENTIALS_PATH", ['https://www.googleapis.com/auth/drive'])
# Define the name of the Google Drive folder
folder_name = 'FOLDER_CONTAINING_FILES'
# Create a Drive API client
service = build('drive', 'v3', credentials=creds)
# Define the function to parse the subdirectories and sub-subdirectories
def parse_directory(filename):
# Split the filename by '] '
parts = filename.split('] ')
subdirs = []
for part in parts:
# Check if the part is a subdirectory
if '[' in part:
subdir = part[1:]
subdirs.append(subdir)
return subdirs
# Get the list of files in the Google Drive folder
try:
query = "mimeType='application/vnd.google-apps.folder' and trashed = false and name='" + folder_name + "'"
folder = service.files().list(q=query).execute().get('files')[0]
folder_id = folder.get('id')
query = "'" + folder_id + "' in parents and trashed = false and mimeType != 'application/vnd.google-apps.folder'"
files = service.files().list(q=query).execute().get('files')
except HttpError as error:
print(f'An error occurred: {error}')
files = []
# Create dictionaries to store the subdirectories and sub-subdirectories
subdirs_dict = {}
subsubdirs_dict = {}
# Loop through the files and create the subdirectories and sub-subdirectories if necessary
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir not in subdirs_dict:
# Create the subdirectory if it doesn't exist
metadata = {'name': subdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subdir_file = service.files().create(body=metadata, fields='id').execute()
subdirs_dict[subdir] = subdir_file.get('id')
parent_id = subdirs_dict[subdir]
if subsubdir != '':
if subsubdir not in subsubdirs_dict:
# Create the sub-subdirectory if it doesn't exist
metadata = {'name': subsubdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subsubdir_file = service.files().create(body=metadata, fields='id').execute()
subsubdirs_dict[subsubdir] = subsubdir_file.get('id')
parent_id = subsubdirs_dict[subsubdir]
# Loop through the files and move them to the appropriate subdirectory or sub-subdirectory
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir in subdirs_dict:
parent_id = subdirs_dict[subdir]
if subsubdir != '' and subsubdir in subsubdirs_dict:
parent_id = subsubdirs_dict[subsubdir]
metadata = {'name': filename, 'parents': [parent_id]}
service.files().update(fileId=file.get('id'), body=metadata).execute()

At present, my code produces (almost) the correct folders but they are not nested properly. For example, for these input files:

[0_diss] good_journals.txt
[0_diss~historic_disses] Zhang (2020) - NAV premia.pdf
[0_diss~literature] A Guide to Writing the Literature Review.pdf

The script produces the following folders:

0_diss
0_diss~historic_disses
0_diss~literature

What I want is for folders entitled '0_diss', 'historic_disses', and 'literature' to be created (if they do not already exist) and for 'historic_disses' and 'literature' to be a subfolder of '0_diss'. I hope this makes more sense.

Would really appreciate some help on this one, I'm not an experienced programmer and am trying my best to learn Python. Thanks.

答案1

得分: 0

此答案假设只有一个~,因为这是您提供的数据和示例。您可以这样解析目录:

from typing import Optional
import re

def parse_directories(filename: str) -> Optional[dict]:
    dirs = {}
    
    # 确保我们有受支持的文件名,将任何不是]~的内容匹配到第一个组中
    # 然后可选地将~后的任何不是]的内容匹配到第二个组中
    match = re.search(r'^\[([^\]~]+)~?([^\]]+)?\]', filename)
    
    # 如果有匹配的内容,分配始终可用的父目录,否则返回null
    if not match:
        return None

    dirs['parent'] = match.group(1)
    
    # 如果找到子目录,也将其分配
    if match.group(2):
        dirs['child'] = match.group(2)
    
    return dirs

然后使用一个循环来创建新的目录并将文件移动到其中

```python
for filename in filenames:
    newdirs = parse_directories(filename)
    if newdirs:
        if 'child' in newdirs:
            # 创建子文件夹
            # 要验证它是否已经创建,只需将路径存储为列表
        else:
            # 如果需要,创建newdirs['parent']
    
        # 将文件移动到新的/已经存在的文件夹中,不需要第二个循环

这应该让您走上正确的轨道。

英文:

This answer assumes there can only be one ~ because that's the data and examples you provided. You can parse directories like this:

from typing import Optional
import re
def parse_directories(filename: str) -> Optional[dict]:
dirs = {}
# Make sure we have a supported file name, match anything not ]~ into group 1
# Then optionally match anything not ] after ~ into group 2 
match = re.search('^\[([^\]~]+)~?([^\]]+)?\]', filename)
# If anything was matched, assign always available parent, else return null
if (not match):
return None
dirs['parent'] = match.group(1)
# If child was found, assign it too
if (match.group(2)):
dirs['child'] = match.group(2)
return dirs

Then use just one loop to create new directories and move the files into them:

for filename in filenames:
newdirs = parse_directories(filename)
if newdirs:
if 'child' in newdirs:
# Create your child folder
# To verify if it's been already made simply store paths as a list
else:
# Create newdirs['parent'] if needed
# Move the file to the new/already existing folder, you don't need a 2nd loop

This should get you on the right track.

huangapple
  • 本文由 发表于 2023年3月7日 04:34:29
  • 转载请务必保留本文链接:https://go.coder-hub.com/75655575.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定