本文介绍了selenium设置proxy、headers的方法,把phantomjs、Chrome、Firefox几个浏览器的设置方法都总结一下,分享给大家,也给自己留个笔记
phantomjs
设置ip
方法1:
- service_args = [
- '--proxy=%s' % ip_html, # 代理 IP:prot (eg:192.168.0.28:808)
- '--proxy-type=http', # 代理类型:http/https
- ‘--load-images=no', # 关闭图片加载(可选)
- '--disk-cache=yes', # 开启缓存(可选)
- '--ignore-ssl-errors=true' # 忽略https错误(可选)
- ]
- driver = webdriver.PhantomJS(service_args=service_args)
方法2:
- browser=webdriver.PhantomJS(PATH_PHANTOMJS)
-
- # 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次url
- proxy=webdriver.Proxy()
- proxy.proxy_type=ProxyType.MANUAL
- proxy.http_proxy='1.9.171.51:800'
-
- # 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中
- proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
- browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
- browser.get('http://1212.ip138.com/ic.asp')
-
- print('1: ',browser.session_id)
- print('2: ',browser.page_source)
- print('3: ',browser.get_cookies())
还原为系统代理
- # 还原为系统代理
- proxy=webdriver.Proxy()
- proxy.proxy_type=ProxyType.DIRECT
- proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
- browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
- browser.get('http://1212.ip138.com/ic.asp')
设置请求头
方法2
- import random,requests,json
- from selenium import webdriver
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- from selenium.webdriver.common.proxy import ProxyType
-
-
- #随机获取一个ip
- def proxies():
- r = requests.get("http://120.26.166.214:9840/JProxy/update/proxy/scoreproxy")
- rr = json.loads(r.text)
- hh = rr['ip'] + ":" + "8907"
- print(hh)
- return hh
- ips =proxies()
-
-
- #设置phantomjs请求头和代理方法一:
- #-------------------------------------------------------------------------------------
- # 设置代理
- service_args = [
- '--proxy=%s' % ips, # 代理 IP:prot (eg:192.168.0.28:808)
- '--ssl-protocol=any', #忽略ssl协议
- '--load - images = no', # 关闭图片加载(可选)
- '--disk-cache=yes', # 开启缓存(可选)
- '--ignore-ssl-errors=true' # 忽略https错误(可选)
- ]
-
- #设置请求头
- user_agent = (
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " +
- "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36"
- )
- dcap = dict(DesiredCapabilities.PHANTOMJS)
- dcap["phantomjs.page.settings.userAgent"] = user_agent
- driver = webdriver.PhantomJS(executable_path=r"C:\soft\phantomjs-2.1.1-windows\bin\phantomjs.exe",
- desired_capabilities=dcap,service_args=service_args)
-
- driver.get(url='http://www.baidu.com')
- page=driver.page_source
- print(page)
-
- #设置phantomjs请求头和代理方法二:
- #-------------------------------------------------------------------------------------
- desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
- # 从USER_AGENTS列表中随机选一个浏览器头,伪装浏览器
- desired_capabilities["phantomjs.page.settings.userAgent"] = (random.choice('请求头池'))
-
- # 不载入图片,爬页面速度会快很多
- desired_capabilities["phantomjs.page.settings.loadImages"] = False
-
- # 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次url
- proxy = webdriver.Proxy()
- proxy.proxy_type = ProxyType.MANUAL
- proxy.http_proxy = random.choice('ip池')
- proxy.add_to_capabilities(desired_capabilities)
- phantomjs_driver = r'C:\phantomjs-2.1.1-windows\bin\phantomjs.exe'
- # 打开带配置信息的phantomJS浏览器
- driver = webdriver.PhantomJS(executable_path=phantomjs_driver,desired_capabilities=desired_capabilities)
- driver.start_session(desired_capabilities)
-
-
- driver.get(url='http://www.baidu.com')
- page=driver.page_source
- print(page)
-
-
- # 隐式等待5秒,可以自己调节
- driver.implicitly_wait(5)
- # 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项
- # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
- driver.set_page_load_timeout(20)
- # 设置10秒脚本超时时间
- driver.set_script_timeout(20)
-
-
-
- #翻页命令
- driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
-
firefox
- import time
- from selenium.webdriver.common.proxy import*
-
- myProxy = '202.202.90.20:8080'
-
- proxy = Proxy({
- 'proxyType': ProxyType.MANUAL,
- 'httpProxy': myProxy,
- 'ftpProxy': myProxy,
- 'sslProxy': myProxy,
- 'noProxy': ''
- })
-
- profile = webdriver.FirefoxProfile()
- if proxy:
- profile = get_firefox_profile_with_proxy_set(profile, proxy)
- if user_agent:
- profile.set_preference("general.useragent.override", user_agent)
-
- driver=webdriver.Firefox(proxy=proxy,profile=profile)
- driver.get('https://www.baidu.com')
- time.sleep(3)
- driver.quit()
-
- firefox无头模式
- from selenium import webdriver
-
- # 创建的新实例驱动
- options = webdriver.FirefoxOptions()
- #火狐无头模式
- options.add_argument('--headless')
- options.add_argument('--disable-gpu')
- # options.add_argument('window-size=1200x600')
-
- executable_path='./source/geckodriver/geckodriver.exe'
- driver_path = webdriver.Firefox(firefox_options=options,executable_path=executable_path)
chrome
- # !/usr/bin/python
- # -*- coding: utf-8 -*-
-
- from selenium import webdriver
-
- # 进入浏览器设置
- options = webdriver.ChromeOptions()
- #谷歌无头模式
- options.add_argument('--headless')
- options.add_argument('--disable-gpu')
- # options.add_argument('window-size=1200x600')
- # 设置中文
- options.add_argument('lang=zh_CN.UTF-8')
- # 更换头部
- options.add_argument('user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"')
- #设置代理
- if proxy:
- options.add_argument('proxy-server=' + proxy)
- if user_agent:
- options.add_argument('user-agent=' + user_agent)
-
- browser = webdriver.Chrome(chrome_options=options)
- url = "https://httpbin.org/get?show_env=1"
- browser.get(url)
- browser.quit()
selenium设置chrome–cookie
- # !/usr/bin/python
- # -*- coding: utf-8 -*-
-
- from selenium import webdriver
- browser = webdriver.Chrome()
-
- url = "https://www.baidu.com/"
- browser.get(url)
- # 通过js新打开一个窗口
- newwindow='window.open("https://www.baidu.com");'
- # 删除原来的cookie
- browser.delete_all_cookies()
- # 携带cookie打开
- browser.add_cookie({'name':'ABC','value':'DEF'})
- # 通过js新打开一个窗口
- browser.execute_script(newwindow)
- input("查看效果")
- browser.quit()
-
selenium设置chrome-图片不加载
- from selenium import webdriver
-
- options = webdriver.ChromeOptions()
- prefs = {
- 'profile.default_content_setting_values': {
- 'images': 2
- }
- }
- options.add_experimental_option('prefs', prefs)
- browser = webdriver.Chrome(chrome_options=options)
-
- # browser = webdriver.Chrome()
- url = "http://image.baidu.com/"
- browser.get(url)
- input("是否有图")
- browser.quit()
-
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持w3xue。