Trying to use python to run a complex macro in excel, code not working



Here is the general idea of my macro I want to perform: A big excel document with drugs names in column B, and their Adverse Effects in column D. I am trying to sort the drugs by adverse effects. Here is an example of my table: 尝试使用Python在Excel中运行复杂的宏,代码不起作用。

  1. open up an excel file saved on my desktop with the pathway "/Desktop/Pharm Exam III Drugscopy.xlsx" and perform this for each sheet within the file: (there are 3 sheets, "Antidepressants and Mood", "Immunomodulators", and "Drugs of Abuse"
  2. Search in column D for every unique value that either comes immediately after a "-" or has a space in between the "-" and the value. For example "-HTN" and "- HTN" will be considered the same.
  3. For every instance of a unique value, copy the corresponding value in column B. (ex. if column D has -HTN, then you look over to column B and copy its value "Venlafaxine"
  4. Create a table in a new sheet to store all of these unique values and their corresponding value in column B with the title of each new column being the unique value, and everything listed below will be the corresponding value that is in column B.
  5. Format the table so that each column is a different color

Here is the code I came up with (only have a few months experience with codeacademy and chatGPT for debugging help)

import pandas as pd
import openpyxl

# read in the excel file with the specified path
file_path = "/Desktop/Pharm Exam III Drugscopy.xlsx"
xl = pd.ExcelFile(file_path)

# create an empty dictionary to hold the data for each sheet
sheet_data = {}

# loop through each sheet and extract the data
for sheet_name in ['Antidepressants and Mood Stabil', 'Immunomodulators', 'Drugs of Abuse']:
    # read in the sheet as a dataframe
    df = xl.parse(sheet_name)
    # create an empty dictionary to hold the data for this sheet
    sheet_dict = {}
    # loop through each row and extract the data
    for index, row in df.iterrows():
        value_d = row['D']
        value_b = row['B']
        # check if the value in column D matches the pattern of interest
        if isinstance(value_d, str) and ('-' in value_d or '- ' in value_d):
            split_values = value_d.split('-')
            for sv in split_values:
                sv = sv.strip() # remove any extra spaces
                if sv != '':
                    sheet_dict[sv] = sheet_dict.get(sv, []) + [value_b]
    # add the sheet data to the overall dictionary
    sheet_data[sheet_name] = sheet_dict

# create a new dataframe to hold the results
result_df = pd.DataFrame(sheet_data)

# write the dataframe to a new sheet in the original excel file
result_sheet_name = "Results"
result_df.to_excel(file_path, sheet_name=result_sheet_name, index=False)

# format the table with alternating column colors
writer = pd.ExcelWriter(file_path, engine='openpyxl')
book = writer.book
sheet = book[result_sheet_name]
for i, col in enumerate(sheet.columns):
    color = 'FFFFFF' if i % 2 == 0 else 'F2F2F2'
    for cell in sheet[f'{col}']:
        cell.fill = PatternFill(start_color=color, end_color=color, fill_type='solid')





得分: 1


import pandas as pd

from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment

dfs = pd.read_excel(file_path, sheet_name=None, usecols=["Drug Name", "Adverse Effects"])

df = (
        pd.concat(dfs.values(), ignore_index=True)
            .assign(Adverse_Effects= lambda df_: df_.pop("Adverse Effects")
                    .loc[lambda s: s.str.len().gt(0)])
            .drop_duplicates().groupby("Adverse_Effects", sort=False).agg("\n".join).T
with pd.ExcelWriter(file_path, mode="a", engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="Results")
    worksheet = writer.sheets["Results"]

    cell_range = f"A2:{get_column_letter(worksheet.max_column)}2"
    align = Alignment(horizontal="left", vertical="center", wrap_text=True)

    for row in worksheet[cell_range]:
        for cell in row:
            cell.alignment = align

    for column in range(2, worksheet.max_column+1):
        col_letter = get_column_letter(column)
        worksheet.column_dimensions[col_letter].width = 25

    worksheet.column_dimensions["A"].width = 11
    worksheet.row_dimensions[2].height = 50



Your expected output is unclear but IIUC, you can use findall (which is a good starting point) :

import pandas as pd

from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment

dfs = pd.read_excel(file_path, sheet_name=None, usecols=["Drug Name", "Adverse Effects"])

df = (
        pd.concat(dfs.values(), ignore_index=True)
            .assign(Adverse_Effects= lambda df_: df_.pop("Adverse Effects")
                    .loc[lambda s: s.str.len().gt(0)])
            .drop_duplicates().groupby("Adverse_Effects", sort=False).agg("\n".join).T

Then, to make the new spreadsheet, you can use ExcelWriter with an openpyxl engine :

with pd.ExcelWriter(file_path, mode="a", engine="openpyxl") as writer:
    df.to_excel(writer, sheet_name="Results")
    worksheet = writer.sheets["Results"]

    cell_range = f"A2:{get_column_letter(worksheet.max_column)}2"
    align = Alignment(horizontal="left", vertical="center", wrap_text=True)
    for row in worksheet[cell_range]:
        for cell in row:
            cell.alignment = align
    for column in range(2, worksheet.max_column+1):
        col_letter = get_column_letter(column)
        worksheet.column_dimensions[col_letter].width = 25
    worksheet.column_dimensions["A"].width = 11
    worksheet.row_dimensions[2].height = 50

Ouptut (spreadsheet) :


