使用selenium去调用浏览器,需要一个驱动,浏览器的webdriver需要独立安装,
如果是chrome在浏览器输入框输入chrome://version/ 查看相应版本,
http://npm.taobao.org/mirrors/chromedriver/下载相应驱动即可
# _*_ coding:utf-8 _*_ from selenium import webdriver # 创建浏览器驱动对象 # 这行代码会打开一个空白的浏览器 driver = webdriver.Chrome() # 访问到项目所在的网址 driver.get("https://www.baidu.com") # 找到页面上的搜索输入框 id="kw" # 找到元素以后,可以直接赋值给变量,再通过变量操作元素 ele = driver.find_element_by_id("kw") ele.send_keys("大风吹") # 对文本输入框输入内容 # 也可以直接操作元素, 百度一下 按钮的 id="su" driver.find_element_by_id("su").click() # # 但不能操作元素后再赋值给变量 # ele = driver.find_element_by_id("su").click() # ele.click() # 相当于 None.click() # driver.quit()
京东:
# _*_ coding:gbk _*_ # 爬取 https://www.jd.com/ 京东图书 import csv import time from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By # 创建浏览器驱动对象 # 这行代码会打开一个空白的浏览器 driver = webdriver.Chrome() # 京东所在网站 # 访问到项目所在的网址 driver.get("https://www.jd.com") # 输入需要查找的关键字 p_input = driver.find_element(By.ID, 'key') p_input.send_keys('Python编程') # 找到输入框输入 time.sleep(1) # 点击搜素按钮 button=driver.find_element(By.CLASS_NAME,"button").click() time.sleep(1) all_book_info = [] num=200 head=['书名', '价格'] #csv文件的路径和名字 path='./book.csv' def write_csv(head,all_book_info,path): with open(path, 'w', newline='',encoding='gbk') as file: # utf-8 乱码建议用 gbk fileWriter = csv.writer(file) fileWriter.writerow(head) fileWriter.writerows(all_book_info) # 爬取一页 def get_onePage_info(num): driver.execute_script('window.scrollTo(0, document.body.scrollHeight);') time.sleep(2) # 书籍列表 J_goodsList = driver.find_element(By.ID, "J_goodsList") listbook = J_goodsList.find_elements(By.TAG_NAME, "li") for res in listbook: num = num-1 book_info = [] name =res.find_element(By.CLASS_NAME, "p-name").find_element(By.TAG_NAME, "em").text price = res.find_element(By.CLASS_NAME, "p-price").find_element(By.TAG_NAME, "i").text book_info.append(name) book_info.append(price) # bookdetail = res.find_element(By.CLASS_NAME, "p-bookdetails") # author = bookdetail.find_element(By.CLASS_NAME, "p-bi-name").find_element(By.TAG_NAME, "a").text # store = bookdetail.find_element(By.CLASS_NAME, "p-bi-store").find_element(By.TAG_NAME, "a").text # book_info.append(author) # book_info.append(store) all_book_info.append(book_info) if num==0: break return num while num!=0: num = get_onePage_info(num) driver.find_element(By.CLASS_NAME, 'pn-next').click() # 点击下一页 time.sleep(2) write_csv(head, all_book_info, path) # driver.close()
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。