import requestsimport reclass Anjuke(object): def __init__(self): self.url = "https://beijing.anjuke.com/sale/huairou/o5/" self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML,like Gecko) Chrome/19.0.1063.0 Safari/536.3"} self.pattern = re.compile(‘<ul id="houselist-mod-new" class="houselist-mod houselist-mod-new">(.*?)</ul>‘,re.S) self.second_pattern = re.compile(‘<(.*?)>|&(.*?);|\s‘) def send_request(self): reponse = requests.get(self.url,headers=self.headers) data = reponse.content.decode() print(data) return data def save_data(self,result_data): with open(‘anjuke.text‘,‘a‘) as f: for data in result_data: second_content = self.second_pattern.sub(‘‘,data) + ‘\n\n‘ f.write(second_content) def analysis_data(self,data): result_list = self.pattern.findall(data) return result_list def run(self): data = self.send_request() result_list = self.analysis_data(data) print(result_list) self.save_data(result_list)if __name__ == ‘__main__‘: Anjuke().run()
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。