如何解决BeautifulSoup 属性错误
我正在尝试使用 BeautifulSoup 和请求来抓取谷歌购物。这是我的代码,非常简单:
from bs4 import BeautifulSoup
import requests
import lxml
import json
def gshop(q):
q = q.replace(' ','+')
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
response = requests.get(f'https://www.google.com/search?q={q}&tbm=shop',headers=headers).text
soup = BeautifulSoup(response,'lxml')
data = []
for container in soup.findAll('div',class_='sh-dgr__content'):
title = container.find('h4',class_='A2sOrd').text
price = container.find('span',class_='a8Pemb').text
supplier = container.find('div',class_='aULzUe IuHnof').text
buy = 'https://google.com'+(container.find('a',class_='eaGTj mQaFGe shntl')['href'])
rating = container.find('span',class_='Rsc7Yb').text
data.append({
"Title": title,"Price": price,"rating": rating,"supplier": supplier,"Link": buy
})
return json.dumps(data,indent = 2,ensure_ascii = False)
print(gshop('toys'))
这会引发错误:
Traceback (most recent call last):
File "c:/Users/Maanav/Desktop/ValRal/main.py",line 45,in <module>
print(gshop('toys'))
File "c:/Users/Maanav/Desktop/ValRal/main.py",line 34,in gshop
rating = container.find('span',class_='Rsc7Yb').text
AttributeError: 'nonetype' object has no attribute 'text'
请查看谷歌购物网址的来源以更好地理解我的代码。出了什么问题?
解决方法
正如@simpleApp 在评论中所解决的:
有时,谷歌购物清单上的产品可能没有评级,或者卖家可能没有添加供应商名称。这将阻止程序运行。为了阻止这种情况发生,我们必须使用异常处理。
from bs4 import BeautifulSoup
import requests
import lxml
import json
def gshop(q):
q = q.replace(' ','+')
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
response = requests.get(f'https://www.google.com/search?q={q}&tbm=shop',headers=headers).text
soup = BeautifulSoup(response,'lxml')
data = []
for container in soup.findAll('div',class_='sh-dgr__content'):
try:
title = container.find('h4',class_='A2sOrd').text
except:
title = None
try:
price = container.find('span',class_='a8Pemb').text
except:
price = None
try:
supplier = container.find('div',class_='aULzUe IuHnof').text
except:
supplier = None
try:
buy = 'https://google.com'+(container.find('a',class_='eaGTj mQaFGe shntl')['href'])
except:
buy = None
try:
rating = container.find('span',class_='Rsc7Yb').text
except:
rating = None
data.append({
"Title": title,"Price": price,"Rating": rating,"Supplier": supplier,"Link": buy
})
return json.dumps(data,indent = 2,ensure_ascii = False)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。