问题

使用Selenium Webdriver，我搜索Google搜索中的输入值。然后逐个获取链接，运行它们并从每个URL获取电子邮件。

[[], [], [], [], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['sales@obejor.com', 'sales@obejor.com'], [], [], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['hello@paykobo.com'], ['hello@paykobo.com'], ['customercare@indiamart.com'], ['customercare@indiamart.com'], [], [], []]
[[], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['hello@paykobo.com'], ['customercare@indiamart.com']]

结果是列表的列表。我需要将它们中的列表提取出来，以便它们成为一个包含字符串值的列表。

# 打开Google
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys

chrome_options = Options()
chrome_options.headless = False
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get('https://www.google.com/')

# 粘贴 - 输入名称
# var_inp=input('Write the name to search:')
var_inp = 'dermalog lf10'
# 搜索图像
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys(var_inp + Keys.RETURN)
# 查找前10家公司
res_lst = []
res = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'cite')))
print(len(res))
for r in res:
    res_lst.append(driver.execute_script("return arguments[0].firstChild.textContent;", r))

print(res_lst)

# 从公司获取电子邮件地址
import re
emails_lst = []
for i in range(len(res_lst)):
    driver.get(res_lst[i])
    email_pattern = r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,4}"
    html = driver.page_source
    emails = re.findall(email_pattern, html)
    driver.implicitly_wait(5)
    print(emails)
    emails_lst.append(emails)

print(emails_lst)
no_duplicates = [x for n, x in enumerate(emails_lst) if x not in emails_lst[:n]]
print(no_duplicates)
driver.close()
# 发送电子邮件

英文:

With Selenium Webdriver I search for input value in google search. Then take links one by one, run them and take emails from each URL.

[[], [], [], [], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['sales@obejor.com', 'sales@obejor.com'], [], [], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['hello@paykobo.com'], ['hello@paykobo.com'], ['customercare@indiamart.com'], ['customercare@indiamart.com'], [], [], [], []] [[], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['507c10c65fb14333bfa540a7cfb4b543@o417096.ingest.sentry.io'], ['hello@paykobo.com'], ['customercare@indiamart.com']]

The result is lists of lists. I need to take lists out of them to be str values inside a list.

#open google
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys

chrome_options = Options()
chrome_options.headless = False
chrome_options.add_argument(&quot;start-maximized&quot;)
# options.add_experimental_option(&quot;detach&quot;, True)
chrome_options.add_argument(&quot;--no-sandbox&quot;)
chrome_options.add_experimental_option(&quot;excludeSwitches&quot;, [&quot;enable-automation&quot;])
chrome_options.add_experimental_option(&#39;excludeSwitches&#39;, [&#39;enable-logging&#39;])
chrome_options.add_experimental_option(&#39;useAutomationExtension&#39;, False)
chrome_options.add_argument(&#39;--disable-blink-features=AutomationControlled&#39;)

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get(&#39;https://www.google.com/&#39;)

#paste - write name
#var_inp=input(&#39;Write the name to search:&#39;)
var_inp=&#39;dermalog lf10&#39;
#search for image
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, &quot;q&quot;))).send_keys(var_inp+Keys.RETURN)
#find first 10 companies
res_lst=[]
res=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,&#39;cite&#39;)))
print(len(res))
for r in res:
    res_lst.append(driver.execute_script(&quot;return arguments[0].firstChild.textContent;&quot;, r))

print(res_lst)

#take email addresses from company
import re
emails_lst=[]
for i in range(len(res_lst)):
    driver.get(res_lst[i])
    email_pattern = r&quot;[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,4}&quot;
    html = driver.page_source
    emails = re.findall(email_pattern, html)
    driver.implicitly_wait(5)
    print(emails)
    emails_lst.append(emails)

print(emails_lst)
no_duplicates=[x for n, x in enumerate(emails_lst) if x not in emails_lst[:n]]
print(no_duplicates)
driver.close()
#send email

答案1

得分: 1

创建一个列表，将一个包含多个列表的列表转换为平坦的列表，你可以使用以下逻辑：

list = [['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['sales@obejor.com', 'sales@obejor.com']]
flat_list = []
for sublist in list:
    for item in sublist:
        flat_list.append(item)
print(flat_list)

或者，你也可以使用单个列表推导式进行嵌套迭代，如下所示：

list = [['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['info@himalayakarya.com', 'info@himalayakarya.com'], ['sales@obejor.com', 'sales@obejor.com'], ['sales@obejor.com', 'sales@obejor.com']]
flatened_list = [item for sublist in list for item in sublist]
print(flatened_list)

控制台输出：

['info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com', 'info@neurotechnology.com', 'support@neurotechnology.com', 'support@neurotechnology.com', 'info@himalayakarya.com', 'info@himalayakarya.com', 'info@himalayakarya.com', 'info@himalayakarya.com', 'sales@obejor.com', 'sales@obejor.com', 'sales@obejor.com', 'sales@obejor.com']

英文:

To create a list out of a list of lists you can use the following logic:

list = [[&#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;], [&#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;], [&#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;], [&#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;], [&#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;], [&#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;]]
flat_list = []
for sublist in list:
    for item in sublist:
        flat_list.append(item)
print(flat_list)

As an alternative you can have nested iterations in a single list comprehension as follows:

list = [[&#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;], [&#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;], [&#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;], [&#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;], [&#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;], [&#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;]]
flatened_list = [item for sublist in list for item in sublist]
print(flatened_list)

Console output:

[&#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;info@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;support@neurotechnology.com&#39;, &#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;, &#39;info@himalayakarya.com&#39;, &#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;, &#39;sales@obejor.com&#39;]

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

Python从列表内提取值

问题

答案1

使用SQLAlchemy执行带有命名参数的SQL语句。

你可以将一个数组存储在一个 pandas 数据框中吗？

可以我访问 Nx3 矩阵中具有特定值的其他列的数据吗？

如何在化学迭代中使用 “for 循环”？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论