day5selenium的使用

总结

selenium基础

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

# 1. 创建浏览器对象
# 注意：浏览器对象如果是局部变量，那么函数结束后浏览器会自动关闭。如果是全局变量浏览器需要手动关闭
b = webdriver.Chrome()

# 2. 打开网页
b.get('https://www.jd.com')

# 3. 获取网页内容
# 注意：在获取浏览器page_source值的时候，只能获取到当前浏览器已经加载出来的数据
print(b.page_source)

# 4. 获取和操作标签
# 1)输入框操作：获取到输入框 -> 输入内容 -> 按回车
# 根据id值获取输入框
input = b.find_element_by_css_selector('#key')

# 在输入框中输入电脑
input.send_keys('电脑')

# 在输入框按回车键
input.send_keys(Keys.ENTER)

time.sleep(1)
input2 = b.find_element_by_css_selector('#key')
# 清空输入框内容
input2.clear()
input2.send_keys('鼠标')

# 获取按钮标签
search_btn = b.find_element_by_css_selector('.button.cw-icon')
# 点击按钮
search_btn.click()

# 4.回退
time.sleep(1)
b.back()
time.sleep(1)
b.back()
time.sleep(1)
b.forward()
time.sleep(1)
b.forward()

# 关闭浏览器
# b.close()

selenium选项卡

from selenium import webdriver
import time

b = webdriver.Chrome()
b.get('https://www.jd.com')

# 获取秒杀对应的a标签
miaosha = b.find_element_by_css_selector('#navitems-group1>li>a')
miaosha.click()

# 获取所有选项卡
print(b.window_handles)

time.sleep(2)
# 切换选项卡
b.switch_to.window(b.window_handles[0])

selenium获取网页cookie

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

url = 'https://www.taobao.com'

b = webdriver.Chrome()
b.get(url)

input = b.find_element_by_css_selector('#q')
input.send_keys('电脑')
input.send_keys(Keys.ENTER)

# 进入到登录页面， 等待人工操作登录页面
time.sleep(10)
print('人工操作结束')
# 人工登录成功后获取cookie值并且保存到本地文件中
cookies = b.get_cookies()
with open('files/taobao_cookies.txt', 'w', encoding='utf-8') as f:
    f.write(str(cookies))

selenium使用cookie

from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
b = webdriver.Chrome()

b.get('https://www.taobao.com')
# 设置cookie
with open(r'./files/taobao_cookies.txt', 'r', encoding='utf-8') as f:
    py_obj = eval(f.read())
    for obj in py_obj:
        # cookie中secure键对应的值是True才支持https的请求
        if obj['secure']:
            b.add_cookie(obj)

time.sleep(1)

b.get('https://www.taobao.com')
# b.refresh()
input = b.find_element_by_css_selector('#q')
input.send_keys('电脑')
input.send_keys(Keys.ENTER)

页面滚动

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup



def get_net_data():
    global b
    b = webdriver.Chrome()
    b.get('https://www.jd.com')

    input = b.find_element_by_css_selector('#key')
    input.send_keys('电脑')
    input.send_keys(Keys.ENTER)

    time.sleep(1)

    # ==================滚动==================
    # 提前设置滚动的最大距离
    max_height = 7000
    # 每次滚动的位置
    y = 0
    while True:
        y += 500
        b.execute_script(f'window.scrollTo(0, {y})')
        if y > max_height:
            break
        time.sleep(1)

    return b.page_source


def an_data(data):
    # #J_goodsList > ul > li:nth-child(1)
    soup = BeautifulSoup(data, 'lxml')
    li_list = soup.select('#J_goodsList > ul > li')
    print(len(li_list))
    print(li_list)


if __name__ == '__main__':
    an_data(get_net_data())

总结

相关推荐