Opening file from Windows gives UnicodeDecodeError: 'utf-8' codec can't decode byte: invalid start byte

huangapple go评论73阅读模式
英文:

Opening file from Windows gives UnicodeDecodeError: 'utf-8' codec can't decode byte: invalid start byte

问题

最近我把我的Windows 10升级为Ubuntu,但我带来的脚本不起作用。在Windows上它们运行得很完美。现在,当我尝试运行它们时,我得到了一个UTF-8编解码错误。

我安装了venvs、pip和所需的模块(pip list)因为我认为可能缺少UTF或Unicode模块,但这没有解决问题。

以下是涉及的文件的代码。这是一个用于自动抓取输入单词结果的词汇表。它还远未完成,但在Windows上可以正常启动和运行(没有终端错误)。

(以下是您的Python代码,我已经去除了代码部分以进行翻译。)

这是我尝试运行它时收到的错误:错误截图。有没有人知道如何修复这个问题?

英文:

Recently I upgraded my windows 10 to Ubuntu, and the scripts that I brought with me don't work. They worked perfectly well on windows though. Now when i try running them i get a utf-8 codec error.

I installed venvs, pip and required modules (pip list) cause i thought that maybe it lacks a utf or unicode module, but that didn't fix it.

Here's the code for the file in question. It's a vocabulary that automatically scrapes results for entered words. It's far from finished, but it starts up and runs fine on windows (no terminal errors)

from tkinter import *
import csv
import tkinter
import re
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
def FkinIndex(number):
if number >> 1:
print("NUMBER:::", number)
elif number == 0:
number += 1
return number
# -------------------------------------------------- Function: strVarSet
def strVarSet(keys, values):
y = 0
vars = {}
l = []
for item in values:
x = eval(item)
for xtem in x:
y = y + 1
vars[y] = x[xtem]
print("\nstrVarSet()\nReturning\n", vars, "\n\n", keys)
return vars, keys
# -------------------------------------------------- Function: DictExtract
def dictExtract():
with open("dir/dess.txt", "r") as y:
x = y.readlines()
d = str(x)
Dict = eval(d)
print(Dict)
print(type(Dict))
l = []
for item in Dict:
print("first print -----\n", item)
x = eval(item)
for key in x:
l.append(key)
print("\ndictExtract()\nreturning\n", l, "\n", Dict)
return l, Dict
# -------------------------------------------------- Function: Next
def Next(index, ):
index = index+1
print(f"-------------\n{index}\n-------------")
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0 # key for dict
descs = {} # dict
for f in vars[index]: # for values in DictList[index]
count = count + 1 # key for dict
descs[count] = f # Extracting descriptions for Labels
print("ff\n\n", f)
return index
# -------------------------------------------------- Function: Current
def Current(index):
print(f"-------------\n{index}\n-------------")
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0  # key for dict
descs = {}  # dict
for f in vars[index]:  # for values in DictList[index]
count = count + 1  # key for dict
descs[count] = f  # Extracting descriptions for Labels
print("ff\n\n", f)
window.update()
return index, vars, descs
# -------------------------------------------------- Function: Previous
def Previous(index, ):
index = index-1
l, keys = dictExtract()
vars, momo = strVarSet(l, keys)
count = 0
descs = {}
for f in vars[index]:
count = count + 1
descs[count] = f
print(f)
# -------------------------------------------------- Function: DictSaver
def dictSaver(d):
with open("dir/dess.txt", "a") as y:
#y = csv.writer(y)
d = str(d)
y.write(d + "\n")
d = {}
# -------------------------------------------------- DictFormer
def DictFormer(l, name):
d = {name:l}
print(d)
dictSaver(d)
# -------------------------------------------------- Function: Button_Words
def Button_Words(words):
l = []
for word in words:
keys, dictList = dictExtract()
if word not in keys:
print(word)
with urlopen(f"https://www.dictionary.com/browse/{word}") as token:
bsobj = BeautifulSoup(token, "html.parser")
section = bsobj.find("div", {"class": "css-69s207 e1hk9ate3"})
l.append(section.get_text())
for span in section.find_next_sibling("div"):
l.append(span.get_text())
print(l)
x = DictFormer(l, word)
l = []
else:
continue
# --------------------------------------------------
token1 = urlopen("https://www.dictionary.com/")
token = requests.Request("https://www.dictionary.com/", headers={'User-Agent': 'Mozilla/5.0'})
bsobj = BeautifulSoup(token1, "html.parser")
table = bsobj.find("section", {"class": "g6v6DANjsJKOolEk5qVH"})
step = table.find("span", {"class": re.compile(".*")})
xstep = step.find("a", {"data-linkid": "nx1fkx"})
print(xstep.get_text())
with open("dir/word_list.csv") as word_list:
word_list = csv.reader(word_list)
count1 = 0
for row in word_list:
count1 = count1 + 1
row = str(row).strip("[").strip("]").strip("\'")
print(row)
v1 = "Hello World"
def WordInput(x):
f = []
f.append(x)
with open("dir/word_list.csv", "a") as y:
writer = csv.writer(y)
writer.writerow(f)
y.close()
def wordUnload():
x=[]
with open("dir/word_list.csv", "r") as y:
reader = csv.reader(y)
for read in reader:
f = str(read)
f = f.strip("[").strip("]").strip("\'")
print(f)
x.append(f)
print(x)
return x
indexxternal = FkinIndex(0)
unloadedw = wordUnload()
window = Tk()
# -------------------------------------------------- Button: Add New Word
NewWordButton = Button(window, text="+", command=lambda: WordInput(input("Add new Word\n> ")))
NewWordButton.grid(row=0, column=1)
# -------------------------------------------------- Button: Load Words
WordsButton = Button(window, text="Words", command=lambda: Button_Words(unloadedw))
WordsButton.grid(row=0, column=0)
# -------------------------------------------------- Button: Next
NextButton = Button(window, text=">", command=lambda: indexxternal==Next(indexxternal))
NextButton.grid(row=0, column=99)
# -------------------------------------------------- Button: Previous
PreviousButton = Button(window, text="<", command=lambda: indexxternal==Previous(indexxternal))
PreviousButton.grid(row=0, column=2)
# -------------------------------------------------- Initial Extraction
l, keys = dictExtract()
count = 0
index, vars, descs = Current(indexxternal)
for f in vars[index]:
count = count + 1
descs[count] = f
print(f)
# -------------------------------------------------- WORD Label
bar1 = tkinter.StringVar(window, str(l[0]).capitalize())
wrd1 = Label(window, textvariable=bar1, font="helvetica 11 underline")
wrd1.grid(row=0, column=3)
try: # --------------------------------------------- Description Label 1
var1 = tkinter.StringVar(window, str(descs[1]))
dsc1 = Label(window, textvariable=var1, font="Helvetica 9 italic")
dsc1.grid(row=1, column=3, pady=5, sticky="W")
except KeyError:
print("Description out of index in Label 1")
try: # --------------------------------------------- Description Label 2
var2 = tkinter.StringVar(window, str(descs[2]))
dsc2 = Label(window, textvariable=var2)
dsc2.grid(row=2, column=3, pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 2")
try: # --------------------------------------------- Description Label 3
var3 = tkinter.StringVar(window, str(descs[3]))
dsc3 = Label(window, textvariable=var3)
dsc3.grid(row=3, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 3")
try: # --------------------------------------------- Description Label 4
var4 = tkinter.StringVar(window, str(descs[4]))
dsc4 = Label(window, textvariable=var4)
dsc4.grid(row=4, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 4")
try: # --------------------------------------------- Description Label 5
var5 = tkinter.StringVar(window, str(descs[5]))
dsc5 = Label(window, textvariable=var5)
dsc5.grid(row=5, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 5")
try: # --------------------------------------------- Description Label 6
var6 = tkinter.StringVar(window, str(descs[6]))
dsc6 = Label(window, textvariable=var6)
dsc6.grid(row=6, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 6")
try: # --------------------------------------------- Description Label 7
var7 = tkinter.StringVar(window, str(descs[7]))
dsc7 = Label(window, textvariable=var7)
dsc7.grid(row=7, column=3,pady=1, sticky="W")
except KeyError:
print("Description out of index in Label 7")
try: # --------------------------------------------- Description Label 8
var8 = tkinter.StringVar(window, str(descs[8]))
dsc8 = Label(window, textvariable=var8)
dsc8.grid(row=8, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 8")
try: # --------------------------------------------- Description Label 9
var9 = tkinter.StringVar(window, str(descs[9]))
dsc9 = Label(window, textvariable=var9)
dsc9.grid(row=9, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 9")
try: # --------------------------------------------- Description Label 10
var10 = tkinter.StringVar(window, str(descs[10]))
dsc10 = Label(window, textvariable=var10)
dsc10.grid(row=10, column=3, sticky="W")
except KeyError:
print("Description out of index in Label 10")
loadword1 = tkinter.StringVar(window)
window.mainloop()

Here's the error I get when I try running it: Error. Does anyone know a way to fix this?

答案1

得分: 1

已解决!
问题出在 dess.txt 的编码上。Windows 对其txt文件进行不同的编码,导致Unicode错误。

对于任何面临类似问题的人:

  1. 进入你的txt所在的目录
  2. 通过终端输入gedit name.txt,在GEdit中打开你的txt文件(你可以在Ubuntu软件应用中获取它)。
  3. 点击保存并在窗口底部更改文件的编码,然后进行覆盖保存。

非常感谢 @snakecharmerb - 我确实被他所吸引。

英文:

SOLVED!
The issue was dess.txt's encoding. Windows encodes its txt files differently which results in a unicode error.

For anyone facing similar problem:

  1. cd into your txt's directory
  2. Open your txt in GEdit (you can get it on ubuntu software app) via terminal by typinggedit name.txt
  3. Click save and change your file's encoding at the bottom of the window and overwrite it.

Huge thanks to @snakecharmerb - i'm charmed indeed

huangapple
  • 本文由 发表于 2023年3月3日 23:30:49
  • 转载请务必保留本文链接:https://go.coder-hub.com/75629016.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定