如何解决Beautifulsoup无法从社交媒体网站上获取正确的信息
我正在尝试构建一个程序,该程序将抓取某个影响者的顶级社交媒体资料,然后吐出相关信息,例如他们拥有多少追随者。我的个人资料链接的收集似乎正在工作。我的问题是我无法弄清楚如何从TikTok(或此时的任何其他网站)网页上获取关注者人数。有人对如何获取此信息有任何提示吗?我还要感谢有关如何使此代码更简洁/更快的任何提示,因为该项目是我第一次真正使用Python。我对Java很满意。
from googlesearch import search
import requests
from bs4 import BeautifulSoup
from urllib import request,response,error,parse
from urllib.request import urlopen
import requests
results = []
query = input("search: " )
def doSearch(tldIn,num):
tiktok = "false"
instagram = "false"
facebook = "false"
snapchat = "false"
twitch = "false"
youtube = "false"
tiktok = "false"
twitter = "false"
bday = "false"
for url in search(query,tld=tldIn,stop=num):
#This is all so I don't get multiple links to their profile.
#Twitter is the biggest culprit here since Google likes to embed tweets in its search results
if "tiktok.com" in url and tiktok == "false":
tiktok = "true"
results.append(url)
if "instagram.com" in url and instagram == "false":
instagram = "true"
results.append(url)
if "facebook.com" in url and facebook == "false":
facebook = "true"
results.append(url)
if "twitter.com" in url and twitter == "false":
twitter = "true"
results.append(url)
if "snapchat.com" in url and snapchat == "false":
snapchat = "true"
results.append(url)
if "twitch.com" in url and twitch == "false":
twitch = "true"
results.append(url)
if "youtube.com" in url and youtube == "false":
youtube = "true"
results.append(url)
if "famousbirthdays.com" in url and bday == "false":
bday = "true"
results.append(url)
def scrapeSites():
for i in results:
site = i
#print('site:' + site)
html = requests.get(site)
print(html.status_code)
soup = BeautifulSoup(html.content,'lxml')
#number = soup.find(class_='number').text
"""title = soup.title
titleText = title.get_text()"""
#print(titleText)
if "tiktok.com" in i:
print(soup.prettify())
foll = soup.find_all('div',class_='number')
print(foll)
h2 = soup.find_all('h2',class_='count-infos')
for elem in h2:
wrappers = elem.find_all('number')
for x in wrappers:
followers = x.find('strong',title_='Followers').getText()
print("TikTok FOllowers: " + followers)
doSearch('com',10)
if len(results) < 3:
results.clear()
doSearch('com',20)
scrapeSites()
print(*results,sep = "\n")
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。