如何解决Tweepy流:TypeError:'NoneType'对象不可下标
我已经创建了一个Postgres数据库,并且正在实现Tweepy的Stream函数来填充它。到现在为止,我已经能够将普通的推文(非转发和非回复)添加到Postgres数据库中。当我添加对应于转发和回复的变量(status.retweeted_status.user.id,status.in_reply_to_user_id)以及位置(经度= status.coordinates [0],纬度= status.coordinates [1])时,出现以下错误:
File "stream.py",line 70,in on_status
longitude = status.coordinates[0]
TypeError: 'nonetype' object is not subscriptable
此错误对于上面的“转发”和“回复”字段也一直存在。我知道这与在坐标字段中具有NA的未启用位置的推文有关,并且我尝试使用if语句来捕获那些推文,但我仍然收到相同的错误。
我的代码:
import time
import user
import tweepy
import psycopg2
auth = tweepy.OAuthHandler(user.CONSUMER_KEY,user.CONSUMER_KEY_SECRET)
auth.set_access_token(user.ACCESS_TOKEN,user.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def __init__(self,time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener,self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
def on_status(self,status):
print(status.text)
##################### tweet table #########################
tweet_id = status.id # Tweet ID
# Tweet
if status.truncated == True:
hashtags = status.extended_tweet['entities']['hashtags']
tweet = status.extended_tweet['full_text']
else:
hashtags = status.entities['hashtags']
tweet = status.text
created_at = status.created_at # created_at
user_id = status.user.id # User ID
retweet_count = status.retweet_count # retweet count
favorite_count = status.favorite_count
##################### user table ##########################
username = status.user.name # Username
followers_count = status.user.followers_count
following_count = status.user.friends_count
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None
# ##################### in_reply_to table ##########################
if status.in_reply_to_user_id is not None:
replying_to_id = status.in_reply_to_user_id
else:
replying_to_id = None
# # ##################### retweeting table ##########################
if status.retweeted_status is not None:
retweeting_id = status.retweeted_status.user.id
else:
retweeting_id = None
# Read hastags
hashtags = read_hashtags(hashtags)
# Language
lang = status.lang
# If tweet is not a retweet and tweet is in English - not hasattr(status,"retweeted_status") and
if lang == "en":
# Connect to database
dbConnect(user_id,username,tweet_id,tweet,created_at,retweet_count,favorite_count,hashtags,followers_count,following_count,longitude,latitude,replying_to_id,retweeting_id)
if (time.time() - self.start_time) > self.limit:
print(time.time(),self.start_time,self.limit)
return False
def on_error(self,status_code):
if status_code == 420:
# Returning False in on_data disconnects the stream
return False
# Extract hashtags
def read_hashtags(tag_list):
hashtags = []
for tag in tag_list:
hashtags.append(tag['text'])
return hashtags
# Connection to database server
# need to allow ip address on GCP first - remember to convert to CIDR format with "to" address
# conn = psycopg2.connect(host="***",database="***",user='***',password = '***')
# Create cursor to execute sql commands
# cur = conn.cursor()
# Insert Tweet data into database
def dbConnect(user_id,retweeting_id):
conn = psycopg2.connect(host="***",password = '***')
cur = conn.cursor()
# insert user information
command = '''INSERT INTO users(user_id,following_count) VALUES (%s,%s,%s) ON CONFLICT
(user_id) DO nothing;'''
cur.execute(command,(user_id,following_count))
# insert tweet information
command = '''INSERT INTO tweet(ID,user_id,favorite_count) VALUES (%s,%s);'''
cur.execute(command,(tweet_id,favorite_count))
# # insert retweeting information
command = '''INSERT INTO retweeting(tweet_id,retweeting_id) VALUES (%s,retweeting_id))
#
# # insert in_reply_to information
command = '''INSERT INTO in_reply_to(tweet_id,replying_to_id) VALUES (%s,replying_to_id))
# insert location information
command = '''INSERT INTO location(tweet_id,latitude) VALUES (%s,latitude))
# insert entity information
for i in range(len(hashtags)):
hashtag = hashtags[i]
command = '''INSERT INTO TwitterEntity (ID,hashtag) VALUES (%s,%s);'''
cur.execute(command,hashtag))
# Commit changes
conn.commit()
# disconnect
cur.close()
conn.close()
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth,listener=myStreamListener,tweet_mode="extended")
myStream.filter(track=['covid','coronavirus','pandemic','covid19','covid-19'])
更新:我使用了tdelaney的解决方案,现在可以填充位置了。我仍然在转发和回复时出错,现在显示为:
File "stream.py",line 82,in on_status
if status.retweeted_status is not None:
AttributeError: 'Status' object has no attribute 'retweeted_status'
我已经编辑了上面的代码以反映更新。
解决方法
您可以测试坐标是否存在,如果不存在则指定默认值。您可能需要更改默认值,具体取决于您希望这种情况出现在数据库中的方式。
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。