opening two instances of chrome webdriver(with and without proxies) with selenium python

huangapple go评论62阅读模式
英文:

opening two instances of chrome webdriver(with and without proxies) with selenium python

问题

我正在尝试使用Python的Selenium创建两个Chrome WebDriver的实例。第一个实例使用代理,而第二个实例没有代理。

我面临的问题是,第二个实例打开的URL无法加载(因为它使用代理)。我已经实现了两个不同的实例,但仍然遇到此错误。以下是我迄今为止编写的代码:

import os, tempfile
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium import webdriver

# 使用代理和隐身模式设置Selenium
def setup_selenium_with_proxy(proxy):
    proxy_config = Proxy()
    proxy_config.proxy_type = ProxyType.MANUAL
    proxy_config.http_proxy = f"{proxy['ip']}:{proxy['port']}"
    proxy_config.ssl_proxy = f"{proxy['ip']}:{proxy['port']}"

    capabilities = webdriver.DesiredCapabilities.CHROME
    proxy_config.add_to_capabilities(capabilities)

    options = webdriver.ChromeOptions()
    options.add_argument("--incognito")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--no-sandbox")
    options.add_argument("--log-level=3")
    options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])

    # 为代理浏览器创建单独的用户数据目录
    user_data_dir_proxy = os.path.join(tempfile.gettempdir(), "selenium_proxy_profile")
    if not os.path.exists(user_data_dir_proxy):
        os.makedirs(user_data_dir_proxy)
    options.add_argument(f"--user-data-dir={user_data_dir_proxy}")

    driver = webdriver.Chrome(options=options, desired_capabilities=capabilities)
    return driver

# 不使用代理和隐身模式设置Selenium
def setup_selenium_without_proxy():
    options = webdriver.ChromeOptions()
    options.add_argument("--incognito")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--no-sandbox")
    options.add_argument("--log-level=3")
    options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])

    # 为非代理浏览器创建单独的用户数据目录
    user_data_dir_no_proxy = os.path.join(tempfile.gettempdir(), "selenium_no_proxy_profile")
    if not os.path.exists(user_data_dir_no_proxy):
        os.makedirs(user_data_dir_no_proxy)
    options.add_argument(f"--user-data-dir={user_data_dir_no_proxy}")

    driver = webdriver.Chrome(options=options)
    return driver

我的主函数首先使用代理驱动程序工作,当我调用第二个无代理的驱动程序时,网站无法工作并显示网站无法访问 net::ERR_TUNNEL_CONNECTION_FAILED

def main():
    url = "http://example.com"
    
    # 从 'http.txt' 文件中读取代理
    with open('http.txt', 'r') as file:
        proxy_list = [{'ip': proxy.split(':')[0], 'port': proxy.split(':')[1].strip()} for proxy in file.readlines()]

    at_least_one_working_proxy = False
    proxy_index = 0

    while proxy_index < len(proxy_list):
        proxy = proxy_list[proxy_index]
        driver = setup_selenium_with_proxy(proxy)
        driver.set_page_load_timeout(20)
        try:
            driver.get(url)
            
            # 代码
			
            new_driver = setup_selenium_without_proxy()
            new_driver.set_page_load_timeout(20)
        except TimeoutException:
            print(f"Failed to open url using proxy {proxy['ip']}:{proxy['port']} within 20 seconds")
        except WebDriverException as e:
            print(f"WebDriverException encountered while using proxy {proxy['ip']}:{proxy['port']}: {e}")
英文:

I am trying to create two instances of chrome webdriver with selenium python. the first one uses proxies and the second one is proxyless.
The problem I am facing is that the URL opened in the second instance doesn't load(because it uses proxies). I've implemented two different instances, but I still get this error. This is what I've written so far:

import os, tempfile
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium import webdriver
# Set up Selenium with proxy and incognito mode
def setup_selenium_with_proxy(proxy):
proxy_config = Proxy()
proxy_config.proxy_type = ProxyType.MANUAL
proxy_config.http_proxy = f&quot;{proxy[&#39;ip&#39;]}:{proxy[&#39;port&#39;]}&quot;
proxy_config.ssl_proxy = f&quot;{proxy[&#39;ip&#39;]}:{proxy[&#39;port&#39;]}&quot;
capabilities = webdriver.DesiredCapabilities.CHROME
proxy_config.add_to_capabilities(capabilities)
options = webdriver.ChromeOptions()
options.add_argument(&quot;--incognito&quot;)
options.add_argument(&quot;--disable-dev-shm-usage&quot;)
options.add_argument(&quot;--no-sandbox&quot;)
options.add_argument(&quot;--log-level=3&quot;)
options.add_experimental_option(&quot;excludeSwitches&quot;, [&quot;enable-automation&quot;, &quot;enable-logging&quot;])
# Create a separate user data directory for the proxy browser
user_data_dir_proxy = os.path.join(tempfile.gettempdir(), &quot;selenium_proxy_profile&quot;)
if not os.path.exists(user_data_dir_proxy):
os.makedirs(user_data_dir_proxy)
options.add_argument(f&quot;--user-data-dir={user_data_dir_proxy}&quot;)
driver = webdriver.Chrome(options=options, desired_capabilities=capabilities)
return driver
# Set up Selenium without proxy and incognito mode
def setup_selenium_without_proxy():
options = webdriver.ChromeOptions()
options.add_argument(&quot;--incognito&quot;)
options.add_argument(&quot;--disable-dev-shm-usage&quot;)
options.add_argument(&quot;--no-sandbox&quot;)
options.add_argument(&quot;--log-level=3&quot;)
options.add_experimental_option(&quot;excludeSwitches&quot;, [&quot;enable-automation&quot;, &quot;enable-logging&quot;])
# Create a separate user data directory for the non-proxy browser
user_data_dir_no_proxy = os.path.join(tempfile.gettempdir(), &quot;selenium_no_proxy_profile&quot;)
if not os.path.exists(user_data_dir_no_proxy):
os.makedirs(user_data_dir_no_proxy)
options.add_argument(f&quot;--user-data-dir={user_data_dir_no_proxy}&quot;)
driver = webdriver.Chrome(options=options)
return driver

my main function works with the proxy driver first, and when I call the second proxyless driver, the website doesn't work and shows website cannot be reached net::ERR_TUNNEL_CONNECTION_FAILED.

def main():
url = &quot;http://example.com&quot;
# Read proxies from &#39;http.txt&#39; file
with open(&#39;http.txt&#39;, &#39;r&#39;) as file:
proxy_list = [{&#39;ip&#39;: proxy.split(&#39;:&#39;)[0], &#39;port&#39;: proxy.split(&#39;:&#39;)[1].strip()} for proxy in file.readlines()]
at_least_one_working_proxy = False
proxy_index = 0
while proxy_index &lt; len(proxy_list):
proxy = proxy_list[proxy_index]
driver = setup_selenium_with_proxy(proxy)
driver.set_page_load_timeout(20)
try:
driver.get(url)
# code
new_driver = setup_selenium_without_proxy()
new_driver.set_page_load_timeout(20)
except TimeoutException:
print(f&quot;Failed to open url using proxy {proxy[&#39;ip&#39;]}:{proxy[&#39;port&#39;]} within 20 seconds&quot;)
except WebDriverException as e:
print(f&quot;WebDriverException encountered while using proxy {proxy[&#39;ip&#39;]}:{proxy[&#39;port&#39;]}: {e}&quot;)

答案1

得分: 0

将以下行添加到您的without_proxy函数中:

options.add_argument('--no-proxy-server')
英文:

add the following line to your without_proxy function:

options.add_argument(&#39;--no-proxy-server&#39;)

huangapple
  • 本文由 发表于 2023年4月7日 05:41:45
  • 转载请务必保留本文链接:https://go.coder-hub.com/75953983.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定