英文:
Opening file from Windows gives UnicodeDecodeError: 'utf-8' codec can't decode byte: invalid start byte
问题
最近我把我的Windows 10升级为Ubuntu,但我带来的脚本不起作用。在Windows上它们运行得很完美。现在,当我尝试运行它们时,我得到了一个UTF-8编解码错误。
我安装了venvs、pip和所需的模块(pip list
)因为我认为可能缺少UTF或Unicode模块,但这没有解决问题。
以下是涉及的文件的代码。这是一个用于自动抓取输入单词结果的词汇表。它还远未完成,但在Windows上可以正常启动和运行(没有终端错误)。
(以下是您的Python代码,我已经去除了代码部分以进行翻译。)
这是我尝试运行它时收到的错误:错误截图。有没有人知道如何修复这个问题?
英文:
Recently I upgraded my windows 10 to Ubuntu, and the scripts that I brought with me don't work. They worked perfectly well on windows though. Now when i try running them i get a utf-8 codec error.
I installed venvs, pip and required modules (pip list) cause i thought that maybe it lacks a utf or unicode module, but that didn't fix it.
Here's the code for the file in question. It's a vocabulary that automatically scrapes results for entered words. It's far from finished, but it starts up and runs fine on windows (no terminal errors)
from tkinter import *
import csv
import tkinter
import re
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
def FkinIndex(number):
if number >> 1:
print("NUMBER:::", number)
elif number == 0:
number += 1
return number
# -------------------------------------------------- Function: strVarSet
def strVarSet(keys, values):
y = 0
vars = {}
l = []
for item in values:
x = eval(item)
for xtem in x:
y = y + 1
vars[y] = x[xtem]
print("\nstrVarSet()\nReturning\n", vars, "\n\n", keys)
return vars, keys
# -------------------------------------------------- Function: DictExtract
def dictExtract():
with open("dir/dess.txt", "r") as y:
x = y.readlines()
d = str(x)
Dict = eval(d)
print(Dict)
print(type(Dict))
l = []
for item in Dict:
print("first print -----\n", item)
x = eval(item)
for key in x:
l.append(key)
print("\ndictExtract()\nreturning\n", l, "\n", Dict)
return l, Dict
# -------------------------------------------------- Function: Next
def Next(index, ):
index = index+1
print(f"-------------\n{index}\n-------------")
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0 # key for dict
descs = {} # dict
for f in vars[index]: # for values in DictList[index]
count = count + 1 # key for dict
descs[count] = f # Extracting descriptions for Labels
print("ff\n\n", f)
return index
# -------------------------------------------------- Function: Current
def Current(index):
print(f"-------------\n{index}\n-------------")
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0 # key for dict
descs = {} # dict
for f in vars[index]: # for values in DictList[index]
count = count + 1 # key for dict
descs[count] = f # Extracting descriptions for Labels
print("ff\n\n", f)
window.update()
return index, vars, descs
# -------------------------------------------------- Function: Previous
def Previous(index, ):
index = index-1
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0
descs = {}
for f in vars[index]:
count = count + 1
descs[count] = f
print(f)
# -------------------------------------------------- Function: DictSaver
def dictSaver(d):
with open("dir/dess.txt", "a") as y:
#y = csv.writer(y)
d = str(d)
y.write(d + "\n")
d = {}
# -------------------------------------------------- DictFormer
def DictFormer(l, name):
d = {name:l}
print(d)
dictSaver(d)
# -------------------------------------------------- Function: Button_Words
def Button_Words(words):
l = []
for word in words:
keys, dictList = dictExtract()
if word not in keys:
print(word)
with urlopen(f"https://www.dictionary.com/browse/{word}") as token:
bsobj = BeautifulSoup(token, "html.parser")
section = bsobj.find("div", {"class": "css-69s207 e1hk9ate3"})
l.append(section.get_text())
for span in section.find_next_sibling("div"):
l.append(span.get_text())
print(l)
x = DictFormer(l, word)
l = []
else:
continue
# --------------------------------------------------
token1 = urlopen("https://www.dictionary.com/")
token = requests.Request("https://www.dictionary.com/", headers={'User-Agent': 'Mozilla/5.0'})
bsobj = BeautifulSoup(token1, "html.parser")
table = bsobj.find("section", {"class": "g6v6DANjsJKOolEk5qVH"})
step = table.find("span", {"class": re.compile(".*")})
xstep = step.find("a", {"data-linkid": "nx1fkx"})
print(xstep.get_text())
with open("dir/word_list.csv") as word_list:
word_list = csv.reader(word_list)
count1 = 0
for row in word_list:
count1 = count1 + 1
row = str(row).strip("[").strip("]").strip("\'")
print(row)
v1 = "Hello World"
def WordInput(x):
f = []
f.append(x)
with open("dir/word_list.csv", "a") as y:
writer = csv.writer(y)
writer.writerow(f)
y.close()
def wordUnload():
x=[]
with open("dir/word_list.csv", "r") as y:
reader = csv.reader(y)
for read in reader:
f = str(read)
f = f.strip("[").strip("]").strip("\'")
print(f)
x.append(f)
print(x)
return x
indexxternal = FkinIndex(0)
unloadedw = wordUnload()
window = Tk()
# -------------------------------------------------- Button: Add New Word
NewWordButton = Button(window, text="+", command=lambda: WordInput(input("Add new Word\n> ")))
NewWordButton.grid(row=0, column=1)
# -------------------------------------------------- Button: Load Words
WordsButton = Button(window, text="Words", command=lambda: Button_Words(unloadedw))
WordsButton.grid(row=0, column=0)
# -------------------------------------------------- Button: Next
NextButton = Button(window, text=">", command=lambda: indexxternal==Next(indexxternal))
NextButton.grid(row=0, column=99)
# -------------------------------------------------- Button: Previous
PreviousButton = Button(window, text="<", command=lambda: indexxternal==Previous(indexxternal))
PreviousButton.grid(row=0, column=2)
# -------------------------------------------------- Initial Extraction
l, keys = dictExtract()
count = 0
index, vars, descs = Current(indexxternal)
for f in vars[index]:
count = count + 1
descs[count] = f
print(f)
# -------------------------------------------------- WORD Label
bar1 = tkinter.StringVar(window, str(l[0]).capitalize())
wrd1 = Label(window, textvariable=bar1, font="helvetica 11 underline")
wrd1.grid(row=0, column=3)
try: # --------------------------------------------- Description Label 1
var1 = tkinter.StringVar(window, str(descs[1]))
dsc1 = Label(window, textvariable=var1, font="Helvetica 9 italic")
dsc1.grid(row=1, column=3, pady=5, sticky="W")
except KeyError:
print("Description out of index in Label 1")
try: # --------------------------------------------- Description Label 2
var2 = tkinter.StringVar(window, str(descs[2]))
dsc2 = Label(window, textvariable=var2)
dsc2.grid(row=2, column=3, pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 2")
try: # --------------------------------------------- Description Label 3
var3 = tkinter.StringVar(window, str(descs[3]))
dsc3 = Label(window, textvariable=var3)
dsc3.grid(row=3, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 3")
try: # --------------------------------------------- Description Label 4
var4 = tkinter.StringVar(window, str(descs[4]))
dsc4 = Label(window, textvariable=var4)
dsc4.grid(row=4, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 4")
try: # --------------------------------------------- Description Label 5
var5 = tkinter.StringVar(window, str(descs[5]))
dsc5 = Label(window, textvariable=var5)
dsc5.grid(row=5, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 5")
try: # --------------------------------------------- Description Label 6
var6 = tkinter.StringVar(window, str(descs[6]))
dsc6 = Label(window, textvariable=var6)
dsc6.grid(row=6, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 6")
try: # --------------------------------------------- Description Label 7
var7 = tkinter.StringVar(window, str(descs[7]))
dsc7 = Label(window, textvariable=var7)
dsc7.grid(row=7, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 7")
try: # --------------------------------------------- Description Label 8
var8 = tkinter.StringVar(window, str(descs[8]))
dsc8 = Label(window, textvariable=var8)
dsc8.grid(row=8, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 8")
try: # --------------------------------------------- Description Label 9
var9 = tkinter.StringVar(window, str(descs[9]))
dsc9 = Label(window, textvariable=var9)
dsc9.grid(row=9, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 9")
try: # --------------------------------------------- Description Label 10
var10 = tkinter.StringVar(window, str(descs[10]))
dsc10 = Label(window, textvariable=var10)
dsc10.grid(row=10, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 10")
loadword1 = tkinter.StringVar(window)
window.mainloop()
Here's the error I get when I try running it: Error. Does anyone know a way to fix this?
答案1
得分: 1
已解决!
问题出在 dess.txt 的编码上。Windows 对其txt文件进行不同的编码,导致Unicode错误。
对于任何面临类似问题的人:
- 进入你的txt所在的目录
- 通过终端输入
gedit name.txt
,在GEdit中打开你的txt文件(你可以在Ubuntu软件应用中获取它)。 - 点击保存并在窗口底部更改文件的编码,然后进行覆盖保存。
非常感谢 @snakecharmerb - 我确实被他所吸引。
英文:
SOLVED!
The issue was dess.txt's encoding. Windows encodes its txt files differently which results in a unicode error.
For anyone facing similar problem:
- cd into your txt's directory
- Open your txt in GEdit (you can get it on ubuntu software app) via terminal by typing
gedit name.txt
- Click save and change your file's encoding at the bottom of the window and overwrite it.
Huge thanks to @snakecharmerb - i'm charmed indeed
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论