Debian10系统下,安装chrome浏览器,安装chrome driver 2021-11-08
# apt-get update # apt-get install wget curl unzip # mkdir -p /root/src # cd /root/src # wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb # apt install /root/src/google-chrome-stable_current_amd64.deb // 大约600多MB的,需要一定的安装时间 // 提示 Download is performed unsandboxed as root as file '/root/src/google-chrome-stable_current_amd64.deb' couldn't be accessed by user '_apt'. - pkgAcquire::Run (13: Permission denied) # ln -s /usr/bin/google-chrome-stable /usr/bin/chrome # /usr/bin/chrome -version // 本次安装到的稳定版本为 Google Chrome 95.0.4638.69 # wget https://chromedriver.storage.googleapis.com/95.0.4638.69/chromedriver_linux64.zip # mv chromedriver chromedriver.95.0.4638.69 # ln -s /root/src/chromedriver.95.0.4638.69 /root/src/chromedriver // https://chromedriver.chromium.org/downloads // https://chromedriver.storage.googleapis.com/index.html?path=95.0.4638.69/
使用下面的Python3,selenium的程序,进行功能验证!
#!/usr/bin/python3 # -*- coding:utf-8 -*- import time from selenium import webdriver aDriver = '/root/src/chromedriver' aScreenShot = '/var/www/html/ScreenShot.png' def run_Chrome(aDriver, aUA, aURL): try: chromeOptions = webdriver.ChromeOptions() chromeOptions.add_argument('--headless') # 无界面模式 chromeOptions.add_argument("--window-size=1920x1080") # 窗口大小 chromeOptions.add_argument('--disable-gpu') # 关闭GPU加速 chromeOptions.add_argument("--mute-audio") # 关闭声音 chromeOptions.add_argument("--no-sandbox"); # 无沙箱 chromeOptions.add_argument("--disable-dev-shm-usage") # chromeOptions.add_argument("--disable-extensions") # 关闭扩展程序 chromeOptions.add_argument("--disable-images") # 关闭图片加载 chromeOptions.add_argument("--log-level=OFF") # 无需日志 chromeOptions.add_argument(f"--user-agent={aUA}") # 修改UA # chromeOptions.add_argument(f'--proxy-server={aProxy}') # 修改谷歌浏览器的代理 d = webdriver.Chrome(executable_path = aDriver, options = chromeOptions) # d.set_window_size(1920, 1080) # 设置分辨率 (宽, 高) d.get(aURL) # 打开需要的网址 time.sleep(3) # 给浏览器的网页加载的时间,方式1 # webdriver.implicitly_wait(3) # 给浏览器的网页加载的时间,方式2 # element_present = EC.presence_of_element_located((By.ID, 'element_id')) # 根据某个元素是否加载,判断是否完成页面加载,方式3 # WebDriverWait(driver, timeout).until(element_present) print("标题 %s" % d.title) # 网站标题 print("URL %s" % d.current_url) # 当前的URL print("源代码 %s" % d.page_source) # 当前的页面渲染后的代码 d.save_screenshot(aScreenShot) # 保存一下截图 # d.page_source -- 字符串 return d.page_source except Exception as e: print(e) return "" finally: d.close() def go2web(): try: aUA = 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/555.36' vWebContent = run_Chrome(aDriver = aDriver, aUA = aUA, aURL = 'http://某个网址/') print(vWebContent) except Exception as e: print(e) if __name__ == '__main__': go2web()