如何解决获取 UnicodeDecodeError
我收到了这个奇怪的 UnicodeDecodeError 和 我不知道为什么会导致这个错误,但如果有人能帮我解决这个问题,那就太好了:)
错误信息:
UnicodeDecodeError: 'charmap' 编解码器无法解码位置 6456 中的字节 0x81:字符映射到
完整的错误消息截图
screenshot of the Error message
我的代码:
import os
import json
import random
import csv
from pydub import AudioSegment
file_path = '/path/to/file/.tsv '
save_json_path = '/path/where/you/want/the/jsons/saved'
def main(args):
data = []
directory = file_path.rpartition('/')[0]
percent = int(100)
with open(file_path) as f:
lenght = sum(1 for ine in f)
with open(file_path,newline='') as csvfile:
reader = csv.DictReader(csvfile,delimiter='\t')
index = 1
if(args.convert):
print(str(lenght) + "files found")
for row in reader:
file_name = row['path']
filename = file_name.rpartition('.')[0] + ".wav"
text = row['sentence']
if(args.convert):
data.append({
"key": directory + "/clips/" + filename,"text": text
})
print("converting file " + str(index) + "/" + str(lenght) + " to wav",end="\r")
src = directory + "/clips/" + file_name
dst = directory + "/clips/" + filename
sound = AudioSegment.from_mp3(src)
sound.export(dst,format="wav")
index = index + 1
else:
data.append({
"key": directory + "/clips/" + file_name,"text": text
})
random.shuffle(data)
print("creating JSON's")
f = open(save_json_path +"/"+ "train.json","w")
with open(save_json_path +"/"+ 'train.json','w') as f:
d = len(data)
i=0
while(i<int(d-d/percent)):
r=data[i]
line = json.dumps(r)
f.write(line + "\n")
i = i+1
f = open(save_json_path +"/"+ "test.json","w")
with open(save_json_path +"/"+ 'test.json','w') as f:
d = len(data)
i=int(d-d/percent)
while(i<d):
r=data[i]
line = json.dumps(r)
f.write(line + "\n")
i = i+1
print("Done!")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="""
Utility script to convert commonvoice into wav and create the training and test json files for speechrecognition. """
)
parser.add_argument('--convert',default=True,action='store_true',help='says that the script should convert mp3 to wav')
args = parser.parse_kNown_args()
main(args)
解决方法
您似乎在此块中收到此错误
with open(file_path) as f:
length = sum(1 for line in f)
在另一个 post 中,虽然它没有被接受的答案,但这可能是因为您的文件的编码。
尝试添加 encoding
kwarg 以打开
with open(file_path,encoding="latin-1") as f:
length = sum(1 for line in f)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。