Skip to main content

Python 常用三方模块

Python 三方模块 requests

requests 是 Python 中最流行的 HTTP 库,被称为"为人类设计的 HTTP"。它简化了 HTTP 请求的发送过程,提供了简洁优雅的 API,是进行网络请求的首选工具。本章节主要讲解 requests 的基本用法、请求头和认证、会话管理、文件操作等等。

pip install requests

基本用法

GET 请求

GET 请求是最常见的 HTTP 方法,用于获取资源。

import requests

# 发送简单的 GET 请求
response = requests.get('https://httpbin.org/get')
print(response.status_code)  # 状态码
print(response.text)  # 响应内容

带参数的 GET 请求

import requests

# 方式一:直接在 URL 中添加参数
response = requests.get('https://httpbin.org/get?key1=value1&key2=value2')

# 方式二:使用 params 参数(推荐)
params = {'key1': 'value1', 'key2': 'value2'}
response = requests.get('https://httpbin.org/get', params=params)
print(response.url)  # 查看完整的请求 URL

POST 请求

POST 请求通常用于提交数据到服务器。

import requests

# 发送表单数据
data = {'username': 'admin', 'password': '123456'}
response = requests.post('https://httpbin.org/post', data=data)
print(response.json())  # 将响应解析为 JSON

发送 JSON 数据

import requests
import json

# 发送 JSON 数据
json_data = {'name': '张三', 'age': 25}
response = requests.post('https://httpbin.org/post', json=json_data)

# 或者手动设置 Content-Type
headers = {'Content-Type': 'application/json'}
response = requests.post('https://httpbin.org/post', 
                        data=json.dumps(json_data), 
                        headers=headers)

处理响应

响应对象的属性和方法

import requests

response = requests.get('https://httpbin.org/get')

# 基本属性
print(f"状态码: {response.status_code}")
print(f"响应头: {response.headers}")
print(f"请求URL: {response.url}")
print(f"编码: {response.encoding}")

# 响应内容
print(f"文本内容: {response.text}")
print(f"字节内容: {response.content}")
print(f"JSON内容: {response.json()}")  # 仅当响应为JSON格式时

状态码检查

import requests

response = requests.get('https://httpbin.org/status/404')

# 检查请求是否成功
if response.status_code == 200:
    print("请求成功")
elif response.status_code == 404:
    print("页面未找到")

# 使用 raise_for_status() 抛出异常
try:
    response.raise_for_status()
except requests.exceptions.HTTPError as e:
    print(f"HTTP错误: {e}")

请求头和认证

自定义请求头

import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
    'Accept': 'application/json',
    'Authorization': 'Bearer your-token-here'
}

response = requests.get('https://httpbin.org/headers', headers=headers)
print(response.json())

基本认证

import requests
from requests.auth import HTTPBasicAuth

# 方式一:使用 auth 参数
response = requests.get('https://httpbin.org/basic-auth/user/pass', 
                       auth=HTTPBasicAuth('user', 'pass'))

# 方式二:简化写法
response = requests.get('https://httpbin.org/basic-auth/user/pass', 
                       auth=('user', 'pass'))
print(response.json())

会话管理

使用 Session 对象

Session 对象可以跨请求保持某些参数,如 cookies、认证信息等。

import requests

# 创建会话对象
session = requests.Session()

# 设置会话级别的请求头
session.headers.update({'User-Agent': 'My-App/1.0'})

# 发送请求
response1 = session.get('https://httpbin.org/cookies/set/sessioncookie/123456789')
response2 = session.get('https://httpbin.org/cookies')

print(response2.json())  # 可以看到第一次请求设置的 cookie

文件操作

文件上传

import requests

# 上传文件
with open('example.txt', 'w') as f:
    f.write('Hello, World!')

with open('example.txt', 'rb') as f:
    files = {'file': f}
    response = requests.post('https://httpbin.org/post', files=files)
    print(response.json())

文件下载

import requests

# 下载小文件
response = requests.get('https://httpbin.org/image/png')
with open('downloaded_image.png', 'wb') as f:
    f.write(response.content)

# 下载大文件(流式下载)
response = requests.get('https://httpbin.org/image/png', stream=True)
with open('large_file.png', 'wb') as f:
    for chunk in response.iter_content(chunk_size=8192):
        f.write(chunk)

异常处理

常见异常类型

import requests
from requests.exceptions import RequestException, Timeout, ConnectionError

try:
    response = requests.get('https://httpbin.org/delay/10', timeout=5)
    response.raise_for_status()
    print(response.json())
    
except Timeout:
    print("请求超时")
except ConnectionError:
    print("连接错误")
except requests.exceptions.HTTPError as e:
    print(f"HTTP错误: {e}")
except RequestException as e:
    print(f"请求异常: {e}")

高级配置

超时设置

import requests

# 设置超时时间(秒)
try:
    response = requests.get('https://httpbin.org/delay/2', timeout=5)
    print("请求成功")
except requests.exceptions.Timeout:
    print("请求超时")

# 分别设置连接和读取超时
response = requests.get('https://httpbin.org/get', timeout=(3.05, 27))

代理设置

import requests

# 设置代理
proxies = {
    'http': 'http://proxy.example.com:8080',
    'https': 'https://proxy.example.com:8080'
}

try:
    response = requests.get('https://httpbin.org/ip', proxies=proxies, timeout=10)
    print(response.json())
except requests.exceptions.RequestException as e:
    print(f"代理请求失败: {e}")

SSL 证书验证

import requests

# 禁用 SSL 证书验证(不推荐在生产环境使用)
response = requests.get('https://httpbin.org/get', verify=False)

# 指定自定义证书文件
response = requests.get('https://httpbin.org/get', verify='/path/to/cert.pem')

最佳实践

  1. 使用 Session 对象:当需要发送多个请求时,使用 Session 可以重用底层的TCP连接,提高性能。
  2. 设置合理的超时时间:避免程序因为网络问题而无限等待。
  3. 异常处理:始终处理可能出现的网络异常。
  4. 使用 raise_for_status():检查HTTP状态码并在出错时抛出异常。
  5. 流式下载大文件:避免将大文件完全加载到内存中。