Debian10系统下,安装chrome浏览器,安装chrome driver 2021-11-08
# apt-get update # apt-get install wget curl unzip # mkdir -p /root/src # cd /root/src # wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb # apt install /root/src/google-chrome-stable_current_amd64.deb // 大约600多MB的,需要一定的安装时间 // 提示 Download is performed unsandboxed as root as file '/root/src/google-chrome-stable_current_amd64.deb' couldn't be accessed by user '_apt'. - pkgAcquire::Run (13: Permission denied) # ln -s /usr/bin/google-chrome-stable /usr/bin/chrome # /usr/bin/chrome -version // 本次安装到的稳定版本为 Google Chrome 95.0.4638.69 # wget https://chromedriver.storage.googleapis.com/95.0.4638.69/chromedriver_linux64.zip # mv chromedriver chromedriver.95.0.4638.69 # ln -s /root/src/chromedriver.95.0.4638.69 /root/src/chromedriver // https://chromedriver.chromium.org/downloads // https://chromedriver.storage.googleapis.com/index.html?path=95.0.4638.69/
使用下面的Python3,selenium的程序,进行功能验证!
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import time
from selenium import webdriver
aDriver = '/root/src/chromedriver'
aScreenShot = '/var/www/html/ScreenShot.png'
def run_Chrome(aDriver, aUA, aURL):
try:
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument('--headless') # 无界面模式
chromeOptions.add_argument("--window-size=1920x1080") # 窗口大小
chromeOptions.add_argument('--disable-gpu') # 关闭GPU加速
chromeOptions.add_argument("--mute-audio") # 关闭声音
chromeOptions.add_argument("--no-sandbox"); # 无沙箱
chromeOptions.add_argument("--disable-dev-shm-usage") #
chromeOptions.add_argument("--disable-extensions") # 关闭扩展程序
chromeOptions.add_argument("--disable-images") # 关闭图片加载
chromeOptions.add_argument("--log-level=OFF") # 无需日志
chromeOptions.add_argument(f"--user-agent={aUA}") # 修改UA
# chromeOptions.add_argument(f'--proxy-server={aProxy}') # 修改谷歌浏览器的代理
d = webdriver.Chrome(executable_path = aDriver, options = chromeOptions)
# d.set_window_size(1920, 1080) # 设置分辨率 (宽, 高)
d.get(aURL) # 打开需要的网址
time.sleep(3) # 给浏览器的网页加载的时间,方式1
# webdriver.implicitly_wait(3) # 给浏览器的网页加载的时间,方式2
# element_present = EC.presence_of_element_located((By.ID, 'element_id')) # 根据某个元素是否加载,判断是否完成页面加载,方式3
# WebDriverWait(driver, timeout).until(element_present)
print("标题 %s" % d.title) # 网站标题
print("URL %s" % d.current_url) # 当前的URL
print("源代码 %s" % d.page_source) # 当前的页面渲染后的代码
d.save_screenshot(aScreenShot) # 保存一下截图
# d.page_source -- 字符串
return d.page_source
except Exception as e:
print(e)
return ""
finally:
d.close()
def go2web():
try:
aUA = 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/555.36'
vWebContent = run_Chrome(aDriver = aDriver, aUA = aUA, aURL = 'http://某个网址/')
print(vWebContent)
except Exception as e:
print(e)
if __name__ == '__main__':
go2web()
