月度归档:2021年11月

通用代理池项目,功能型函数,检测通用代理是否可用 2021-11-24

// 依赖的2个模块

# pip3 install requests   # 网络请求的
# pip3 install pysocks    # 支持socks的
import requests 

def checkProxyStatus(TargetURL, aUA, aProxy, aTimeout):
    """
        runCheckProxyStatus(TargetURL, aUA, aProxy, aTimeout)
        依赖模块 requests  pysocks
        参数 TargetURL 用于检测的一个网站地址
        参数 aUA 一个随机的User-Agent
        参数 aProxy 需要检测的那个代理 格式为 vtype://ip:port
        参数 aTimeout 超时的时间
        返回值,依据状态码是否为200,判定为是,返回 字符 Y
        返回值,依据状态码是否为200,判定为否,返回 字符 N

    """
    try:
        myHeader = {"User-Agent": aUA}
        myProxy = {'http': aProxy, 'https': aProxy}
        aRSP = requests.get(url = TargetURL, headers = myHeader, proxies = myProxy, timeout = aTimeout)
        if aRSP.status_code == 200:
            return 'Y'
        else:
            return 'N'
    except Exception as e:
        return 'N'
# 实际测了如下的一段代码,通过。

import requests

def checkProxyStatus(TargetURL, aUA, aProxy, aTimeout):
    """
        runCheckProxyStatus(TargetURL, aUA, aProxy, aTimeout)
        依赖模块 requests  pysocks
        参数 TargetURL 用于检测的一个网站地址
        参数 aUA 一个随机的User-Agent
        参数 aProxy 需要检测的那个代理 格式为 vtype://ip:port
        参数 aTimeout 超时的时间
        返回值,依据状态码是否为200,判定为是,返回 字符 Y
        返回值,依据状态码是否为200,判定为否,返回 字符 N

    """
    try:
        myHeader = {"User-Agent": aUA}
        myProxy = {'http': aProxy, 'https': aProxy}
        aRSP = requests.get(url = TargetURL, headers = myHeader, proxies = myProxy, timeout = aTimeout)
        if aRSP.status_code == 200:
            return 'Y'
        else:
            return 'N'
    except Exception as e:
        return 'N'
        
        
v = checkProxyStatus(TargetURL = 'https://www.duckduckgo.com', aUA = 'Python', aProxy = 'socks5://127.0.0.1:10808', aTimeout = 3)
print(v)

# 在本地10808的v2代理开着时候,上面的代码,正常返回 字符 Y
Donate
云乞讨

通用代理池项目,功能型函数,PostgreSQL数据库的连接 2021-11-22

使用的模块为psycopg2

安装方式:

pip3 install psycopg2-binary

模块测试

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import psycopg2

#   PostgreSQL 数据入库模块 正确执行返回Y 否则返回N  增 / 删 / 改
def runDataInPostgreSQLDB(aHost, aPort, aDB, aUser, aPass, aSQL):
    """ Connect to the PostgreSQL database server. insert/delete/update"""
    conn = None
    try:
        print(' + Database connection START')
        conn = psycopg2.connect(dbname = aDB, user = aUser, password = aPass, host = aHost, port = aPort)		
        cur = conn.cursor()
        cur.execute(aSQL)
        conn.commit()
        cur.close()
        return 'Y'
    except (Exception, psycopg2.DatabaseError) as e:
        print(e)
        return 'N'
    finally:
        if conn is not None:
            conn.close()
            print(' + Database connection CLOSE')

#   PostgreSQL 数据读取模块 正确返回数据列表 错误返回空列表
def runDataOutPostgreSQLDB(aHost, aPort, aDB, aUser, aPass, aSQL):
    """ Connect to the PostgreSQL database server. select"""
    conn = None
    try:
        print(' + Database connection START')
        conn = psycopg2.connect(dbname = aDB, user = aUser, password = aPass, host = aHost, port = aPort)		
        cur = conn.cursor()
        cur.execute(aSQL)       
        data = cur.fetchall()
        cur.close()
        return data
    except (Exception, psycopg2.DatabaseError) as e:
        print(e)
        return []
    finally:
        if conn is not None:
            conn.close()
            print(' + Database connection CLOSE')
            
aSQL = "select vtype,ip,port from mdata limit 3;"            
vData = runDataOutPostgreSQLDB(aHost = '127.0.0.1', aPort = 5432, aDB = "你的数据库实例名称", aUser = "你的数据库用户名", aPass = "你的数据库密码", aSQL = "一个SQL")  
print(vData)    
// 测试结果

root@PostgreSQLDatabase:~/src/invoker# python3 tmp1058.py 
 + Database connection START
 + Database connection CLOSE
[('socks4', '203.207.52.38', 5430), ('socks4', '103.146.170.244', 5678)]
root@PostgreSQLDatabase:~/src/invoker# 
如果用命令行连接数据库
# psql -d 你的数据库实例名称 -h 127.0.0.1 -p 5432 -U 你的数据库用户名 -W 
Donate
云乞讨

通用代理池项目,功能型函数,验证端口Port是否正确 2021-11-24

user@server:~$ cat tmp1358.py
def isActivePort(port):
    """
        isActivePort(port)
        参数 port,字符串
        用途,判断端口port字符串是否正确
        返回值,判定为是,返回 字符串 'Y'
        返回值,判定为否,返回 字符串 'N'
    """
    try:
        if int(port) >= 1 and int(port) <= 65535:
            return 'Y'
        else:
            return 'N'
    except Exception as e:
        return 'N'

data1 = 'somedata'
data2 = '0'
data3 = '65536'
data4 = '2048'
v1 = isActivePort(port = data1)
v2 = isActivePort(port = data2)
v3 = isActivePort(port = data3)
v4 = isActivePort(port = data4)
print(f"{data1} - {v1}")
print(f"{data2} - {v2}")
print(f"{data3} - {v3}")
print(f"{data4} - {v4}")


user@server:~$ python3 tmp1358.py
somedata - N
0 - N
65536 - N
2048 - Y
user@server:~$ 
# 2021-11-24 更新为

def isActivePort(port):
    """
        isActivePort(port)
        参数 port,字符串
        用途,判断端口port字符串是否正确
        返回值,判定为是,返回 字符串 'Y'
        返回值,判定为否,返回 字符串 'N'
    """
    try:
        if port.isdigit() and int(port) >= 1 and int(port) <= 65535:
            return 'Y'
        else:
            return 'N'
    except Exception as e:
        return 'N'
Donate
云乞讨

通用代理池项目,功能型函数,验证IPv4地址是否正确 2021-11-10

isActiveIPv4(ip)
参数 ip,字符串
用途,判断ip字符串是否为合法的IPv4地址
依赖模块,ipaddress
返回值,判定为是,返回 字符串 'Y'
返回值,判定为否,返回 字符串 'N'
user@server:~$ cat tmp1348.py
import ipaddress
def isActiveIPv4(ip):
    """
        isActiveIPv4(ip)
        参数 ip,字符串
        用途,判断ip字符串是否为合法的IPv4地址
        依赖模块,ipaddress
        返回值,判定为是,返回 字符串 'Y'
        返回值,判定为否,返回 字符串 'N'
    """
    try:
        vBLACKIPs = ['0.0.0.0','255.255.255.255']
        if ipaddress.IPv4Address(ip) and ip not in vBLACKIPs:
            return 'Y'
        else:
            return 'N'
    except Exception as e:
        return 'N' 


data1 = 'datastring'
data2 = '127.0.0.1'
data3 = '192.168.1.1'
data4 = '0.0.0.0'
v1 = isActiveIPv4(ip = data1)
v2 = isActiveIPv4(ip = data2)
v3 = isActiveIPv4(ip = data3)
v4 = isActiveIPv4(ip = data4)
print(f"{data1} - {v1}")
print(f"{data2} - {v2}")
print(f"{data3} - {v3}")
print(f"{data4} - {v4}")

user@server:~$ python3 tmp1348.py 
datastring - N
127.0.0.1 - Y
192.168.1.1 - Y
0.0.0.0 - N
user@server:~$ 




Donate
云乞讨

通用代理池项目,功能型函数,验证通用代理类型 2021-11-10

检查一个字符串,是否是通用代理”http”,”https”,”socks4″,”socks5″的类型,是否满足,满足判定,返回”Y”,否则,返回”N”

user@server:~$ cat tmp1221.py 
def isActiveValueType(s):
    """
        isActiveValueType(s)
        参数 s,字符串
        用途,判断字符串是否在 ["http","https","socks4","socks5"] 中
        返回值,判定为是,返回 字符串 'Y'
        返回值,判定为否,返回 字符串 'N'
    """
    vDataUnit = ["http","https","socks4","socks5"]
    if s in vDataUnit:
        return 'Y'
    else:
        return 'N'

data1 = 'somedata'
data2 = 'http'
data3 = 'socks5'
data4 = 'linux6'
v1 = isActiveValueType(s = data1)
v2 = isActiveValueType(s = data2)
v3 = isActiveValueType(s = data3)
v4 = isActiveValueType(s = data4)
print(f"{data1} - {v1}")
print(f"{data2} - {v2}")
print(f"{data3} - {v3}")
print(f"{data4} - {v4}")




user@server:~$ python3 tmp1221.py 
somedata - N
http - Y
socks5 - Y
linux6 - N
user@server:~$ 
Donate
云乞讨

通用代理池,爬虫项目,采集模块构建,静态型采集任务 2021-11-09

采集网站 https://www.socks-proxy.net/
采集页面 网站主页面
采集形式 非动态 匹配 IP:PORT 
匹配规则 pttn = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5})'
备注说明 都是一些socks4类型的数据
最后一次人工校验时间 2021-10-09 20:07

import re
s = "<tr><td>210.56.244.134</td><td>4145</td><td>AU</td><td class='hm'>Australia</td><td>Socks4</td>"
pttn = r"<tr><td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td><td>(\d{1,5})</td><td>.*?</td><td class='hm'>.*?</td><td>(\S{1,6})</td>"
data = re.findall(pttn, s)
print(data)
Donate
云乞讨

Debian10系统下,安装chrome浏览器,安装chrome driver 2021-11-08

Debian10系统下,安装chrome浏览器,安装chrome driver 2021-11-08

# apt-get update

# apt-get install wget curl unzip

# mkdir -p /root/src

# cd /root/src

# wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb

# apt install /root/src/google-chrome-stable_current_amd64.deb
  // 大约600多MB的,需要一定的安装时间
  // 提示 Download is performed unsandboxed as root as file '/root/src/google-chrome-stable_current_amd64.deb' couldn't be accessed by user '_apt'. - pkgAcquire::Run (13: Permission denied)

# ln -s /usr/bin/google-chrome-stable /usr/bin/chrome

# /usr/bin/chrome -version
  // 本次安装到的稳定版本为 Google Chrome 95.0.4638.69 

# wget https://chromedriver.storage.googleapis.com/95.0.4638.69/chromedriver_linux64.zip
# mv chromedriver chromedriver.95.0.4638.69
# ln -s /root/src/chromedriver.95.0.4638.69  /root/src/chromedriver
  // https://chromedriver.chromium.org/downloads
  // https://chromedriver.storage.googleapis.com/index.html?path=95.0.4638.69/

使用下面的Python3,selenium的程序,进行功能验证!

#!/usr/bin/python3
# -*- coding:utf-8 -*-
import time
from selenium import webdriver

aDriver = '/root/src/chromedriver'
aScreenShot = '/var/www/html/ScreenShot.png'

def run_Chrome(aDriver, aUA, aURL):
    try:
        chromeOptions = webdriver.ChromeOptions()
        chromeOptions.add_argument('--headless')                # 无界面模式
        chromeOptions.add_argument("--window-size=1920x1080")   # 窗口大小
        chromeOptions.add_argument('--disable-gpu')             # 关闭GPU加速
        chromeOptions.add_argument("--mute-audio")              # 关闭声音
        chromeOptions.add_argument("--no-sandbox");             # 无沙箱
        chromeOptions.add_argument("--disable-dev-shm-usage")   # 
        chromeOptions.add_argument("--disable-extensions")      # 关闭扩展程序
        chromeOptions.add_argument("--disable-images")          # 关闭图片加载
        chromeOptions.add_argument("--log-level=OFF")           # 无需日志
        chromeOptions.add_argument(f"--user-agent={aUA}")       # 修改UA
        # chromeOptions.add_argument(f'--proxy-server={aProxy}')  # 修改谷歌浏览器的代理
        d = webdriver.Chrome(executable_path = aDriver, options = chromeOptions)
        #  d.set_window_size(1920, 1080)                        # 设置分辨率 (宽, 高)
        d.get(aURL)                                             # 打开需要的网址
        time.sleep(3)                                           # 给浏览器的网页加载的时间,方式1
        #  webdriver.implicitly_wait(3)                         # 给浏览器的网页加载的时间,方式2
        #  element_present = EC.presence_of_element_located((By.ID, 'element_id'))  # 根据某个元素是否加载,判断是否完成页面加载,方式3
        #  WebDriverWait(driver, timeout).until(element_present)
        print("标题 %s" % d.title)                              # 网站标题
        print("URL %s" % d.current_url)                         # 当前的URL
        print("源代码 %s" % d.page_source)                      # 当前的页面渲染后的代码
        d.save_screenshot(aScreenShot)                          # 保存一下截图
        # d.page_source -- 字符串
        return d.page_source
    except Exception as e:
        print(e)
        return ""
    finally:
        d.close()
            


def go2web():
    try:
        aUA = 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/555.36'
        vWebContent = run_Chrome(aDriver = aDriver, aUA = aUA, aURL = 'http://某个网址/')
        print(vWebContent)
    except Exception as e:
        print(e)
 

if __name__ == '__main__':
    go2web()
    




Donate
云乞讨

iptables 转发流量 2021-11-07

使用iptables转发流量的命令语法,简单示例一个TCP转发

iptables -A PREROUTING -p tcp -m tcp --dport 中转服务器入口端口号 -j DNAT --to-destination 目标服务器IP地址:目标服务器端口号

iptables -A POSTROUTING -d 目标服务器IP地址/32 -p tcp -m tcp --dport 目标服务器端口号 -j SNAT --to-source 中转服务器本地IP地址

iptables -A INPUT -p tcp -m state --state NEW -m tcp --dport 中转服务器入口端口号 -j ACCEPT
Donate
云乞讨

Python3框架FLASK知识点 2021-11-2

基础安装

# apt-get update
# apt-get install python3-pip python3 python3-gevent

基础的一个示例

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 2021-11-02 
# Dasmz  

import flask

app = flask.Flask(__name__)

@app.route('/', methods=['GET'])
def index():
        return '<p>Hello World!</p>'

if __name__ == "__main__":
    app.run(host='0.0.0.0',port=41012) 


# 服务端 
root@cafe560b8380:~# python3 1102-1634.py 
 * Serving Flask app '1102-1634' (lazy loading)
 * Environment: production
   WARNING: This is a development server. Do not use it in a production deployment.
   Use a production WSGI server instead.
 * Debug mode: off
 * Running on all addresses.
   WARNING: This is a development server. Do not use it in a production deployment.
 * Running on http://172.17.0.3:41012/ (Press CTRL+C to quit)
x.×.12.211 - - [02/Nov/2021 08:34:42] "GET / HTTP/1.1" 200 -
x.x.12.211 - - [02/Nov/2021 08:34:42] "GET /favicon.ico HTTP/1.1" 404 -
x.x.12.211 - - [02/Nov/2021 08:35:16] "GET / HTTP/1.1" 200 -
# 客户端,验证请求
curl http://198.*.*.196:41012/
<p>Hello World!</p>
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 2021-11-02 
# Dasmz

import flask

app = flask.Flask(__name__)

# 主路由
@app.route('/', methods=['GET'])
def index():
        return '<p>Hello World!</p>\n'

# 增加一个名称的路径参数
@app.route('/home/<name>', methods=['GET'])
def home(name):
        return '<p>Hello World!<br><br> Hi, <b>{}</b>.</p>\n'.format(name)
        
if __name__ == "__main__":
    app.run(host='0.0.0.0',port=41012) 



#  curl http://IPv4:41012/
#  curl http://IPv4:41012/home/david
#  curl http://IPv4:41012/home/J.J.HarrySon
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 2021-11-02 
# Dasmz

import flask

app = flask.Flask(__name__)

# 主路由
@app.route('/', methods=['GET'])
def index():
        return '<p>Hello World!</p>\n'

# 增加一个名称的路径参数
@app.route('/home/<name>', methods=['GET'])
def home(name):
        return '<p>Hello World!<br><br> Hi, <b>{}</b>.</p>\n'.format(name)

# 增加API,返回客户端的UA
@app.route('/API/getUA', methods=['GET'])
def getUserAgent():
        return '{}\n'.format(flask.request.headers.get('User-Agent'))
        
if __name__ == "__main__":
    app.run(host='0.0.0.0',port=41012) 



#  curl http://IPv4:41012/
#  curl http://IPv4:41012/home/david
#  curl http://IPv4:41012/home/J.J.HarrySon
#  curl http://IPv4:41012/API/getUA