import requests import pyquery def crawl_page(url: str) -> None: headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/72.0.3626.121 Safari/537.36',} response = requests.get(url,headers=headers) parse_page(response.text) def parse_page(source_code: str) -> None: html = pyquery.PyQuery(source_code) dd_elements = html('.board-wrapper dd') for dd_element in dd_elements.items(): data = { '排名': dd_element.find('i.board-index').text(),'电影名': dd_element.find('a.image-link').attr('title'),'主演': dd_element.find('p.star').text().split(':')[1],'上映时间': dd_element.find('p.releasetime').text().split(':')[1],'评分': dd_element.find('p.score').text(),} print(data) save_data(data) def save_data(data: dict) -> None: data = str(data) with open('MaoYan.txt','a+',encoding='utf8') as f: f.write(data+'\n') return None def main(): for i in range(0,100,10): url = 'https://maoyan.com/board/4?offset={}'.format(i) crawl_page(url) if __name__ == '__main__': main()
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。