无法将文章内容附加到列表

如何解决无法将文章内容附加到列表

使用 python news3k 包，我试图遍历网站上的所有文章，并使用文章内容构建一个数据框。

文章的元数据是一个嵌套的字典，我可以将它从一篇文章中提取出来，但是当我循环遍历它们时就不行了。列表的长度为 0

from rake_nltk import Rake
import readability
import newspaper
from newspaper import Config
from newspaper import Article
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download([
    "names","stopwords","state_union","twitter_samples","movie_reviews","averaged_perceptron_tagger","vader_lexicon","punkt",])

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/90.0.4430.212 Safari/537.36'

config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10


base_url = 'https://www.marketwatch.com'
article_urls = set()
marketwatch = newspaper.build(base_url,config=config,memoize_articles=False,language='en')


title = []
sitename = []
og_type = []
url = []
og_description = []
twitter_identifier = []
twitter_id = []
fb_id = []
author = []
section = []
pub_date = []
tags = []

for sub_article in marketwatch.articles[0:10]:
  try:
    article = Article(sub_article.url,language='en')
    article.download()
    article.parse()

    if article.url not in article_urls:
      article_urls.add(article.url)

      # The majority of the article elements are located
      # within the Meta data section of the page's
      # navigational structure
      article_Meta_data = article.Meta_data

      published_date = {value for (key,value) in article_Meta_data.items() if key == 'parsely-pub-date'}
      article_published_date = " ".join(str(x) for x in published_date)

      authors = sorted({value for (key,value) in article_Meta_data.items() if key == 'parsely-author'})
      article_author = ','.join(authors)

      title = {value for (key,value) in article_Meta_data.items() if key == 'parsely-title'}
      article_title = " ".join(str(x) for x in title)

      keywords = ''.join({value for (key,value) in article_Meta_data.items() if key == 'keywords'})
      keywords_list = sorted(keywords.lower().split(','))
      article_keywords = ','.join(keywords_list)

      tags = ''.join({value for (key,value) in article_Meta_data.items() if key == 'parsely-tags'})
      tag_list = sorted(tags.lower().split(','))
      article_tags = ','.join(tag_list)

      summary = {value for (key,value) in article_Meta_data.items() if key == 'description'}
      article_summary = " ".join(str(x) for x in summary)

      # the replace is used to remove newlines
      article_text = article.text.replace('\n','')
      for key,value in article.Meta_data.items():
        print(key,' : ',value)
      
      # Trying to append content to the list
      title.append(article.Meta_data['og']['title'])
      sitename.append(article.Meta_data['og']['site_name'])
      og_type.append(article.Meta_data['og']['type'])
      url.append(article.Meta_data['og']['url'])
      og_description.append(article.Meta_data['og']['description'])
      twitter_identifier.append(article.Meta_data['twitter']['site']['identifier'])
      twitter_id.append(article.Meta_data['twitter']['site']['id'])
      fb_id.append(article.Meta_data['fb']['app_id'])
      author.append(article.Meta_data['author'])
      section.append(article.Meta_data['parsely-section'])
      pub_date.append(article.Meta_data['parsely-pub-date'])
      tags.append(article.Meta_data['parsely-tags'])

      print()

  except:
    pass

无法将文章内容附加到列表

如何解决无法将文章内容附加到列表

相关推荐