Python 三方模块 requests
requests 是 Python 中最流行的 HTTP 库,被称为"为人类设计的 HTTP"。它简化了 HTTP 请求的发送过程,提供了简洁优雅的 API,是进行网络请求的首选工具。本章节主要讲解 requests 的基本用法、请求头和认证、会话管理、文件操作等等。
pip install requests
基本用法
GET 请求
GET 请求是最常见的 HTTP 方法,用于获取资源。
import requests
# 发送简单的 GET 请求
response = requests.get('https://httpbin.org/get')
print(response.status_code) # 状态码
print(response.text) # 响应内容
带参数的 GET 请求
import requests
# 方式一:直接在 URL 中添加参数
response = requests.get('https://httpbin.org/get?key1=value1&key2=value2')
# 方式二:使用 params 参数(推荐)
params = {'key1': 'value1', 'key2': 'value2'}
response = requests.get('https://httpbin.org/get', params=params)
print(response.url) # 查看完整的请求 URL
POST 请求
POST 请求通常用于提交数据到服务器。
import requests
# 发送表单数据
data = {'username': 'admin', 'password': '123456'}
response = requests.post('https://httpbin.org/post', data=data)
print(response.json()) # 将响应解析为 JSON
发送 JSON 数据
import requests
import json
# 发送 JSON 数据
json_data = {'name': '张三', 'age': 25}
response = requests.post('https://httpbin.org/post', json=json_data)
# 或者手动设置 Content-Type
headers = {'Content-Type': 'application/json'}
response = requests.post('https://httpbin.org/post',
data=json.dumps(json_data),
headers=headers)
处理响应
响应对象的属性和方法
import requests
response = requests.get('https://httpbin.org/get')
# 基本属性
print(f"状态码: {response.status_code}")
print(f"响应头: {response.headers}")
print(f"请求URL: {response.url}")
print(f"编码: {response.encoding}")
# 响应内容
print(f"文本内容: {response.text}")
print(f"字节内容: {response.content}")
print(f"JSON内容: {response.json()}") # 仅当响应为JSON格式时
状态码检查
import requests
response = requests.get('https://httpbin.org/status/404')
# 检查请求是否成功
if response.status_code == 200:
print("请求成功")
elif response.status_code == 404:
print("页面未找到")
# 使用 raise_for_status() 抛出异常
try:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print(f"HTTP错误: {e}")
请求头和认证
自定义请求头
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/json',
'Authorization': 'Bearer your-token-here'
}
response = requests.get('https://httpbin.org/headers', headers=headers)
print(response.json())
基本认证
import requests
from requests.auth import HTTPBasicAuth
# 方式一:使用 auth 参数
response = requests.get('https://httpbin.org/basic-auth/user/pass',
auth=HTTPBasicAuth('user', 'pass'))
# 方式二:简化写法
response = requests.get('https://httpbin.org/basic-auth/user/pass',
auth=('user', 'pass'))
print(response.json())
会话管理
使用 Session 对象
Session 对象可以跨请求保持某些参数,如 cookies、认证信息等。
import requests
# 创建会话对象
session = requests.Session()
# 设置会话级别的请求头
session.headers.update({'User-Agent': 'My-App/1.0'})
# 发送请求
response1 = session.get('https://httpbin.org/cookies/set/sessioncookie/123456789')
response2 = session.get('https://httpbin.org/cookies')
print(response2.json()) # 可以看到第一次请求设置的 cookie
文件操作
文件上传
import requests
# 上传文件
with open('example.txt', 'w') as f:
f.write('Hello, World!')
with open('example.txt', 'rb') as f:
files = {'file': f}
response = requests.post('https://httpbin.org/post', files=files)
print(response.json())
文件下载
import requests
# 下载小文件
response = requests.get('https://httpbin.org/image/png')
with open('downloaded_image.png', 'wb') as f:
f.write(response.content)
# 下载大文件(流式下载)
response = requests.get('https://httpbin.org/image/png', stream=True)
with open('large_file.png', 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
异常处理
常见异常类型
import requests
from requests.exceptions import RequestException, Timeout, ConnectionError
try:
response = requests.get('https://httpbin.org/delay/10', timeout=5)
response.raise_for_status()
print(response.json())
except Timeout:
print("请求超时")
except ConnectionError:
print("连接错误")
except requests.exceptions.HTTPError as e:
print(f"HTTP错误: {e}")
except RequestException as e:
print(f"请求异常: {e}")
高级配置
超时设置
import requests
# 设置超时时间(秒)
try:
response = requests.get('https://httpbin.org/delay/2', timeout=5)
print("请求成功")
except requests.exceptions.Timeout:
print("请求超时")
# 分别设置连接和读取超时
response = requests.get('https://httpbin.org/get', timeout=(3.05, 27))
代理设置
import requests
# 设置代理
proxies = {
'http': 'http://proxy.example.com:8080',
'https': 'https://proxy.example.com:8080'
}
try:
response = requests.get('https://httpbin.org/ip', proxies=proxies, timeout=10)
print(response.json())
except requests.exceptions.RequestException as e:
print(f"代理请求失败: {e}")
SSL 证书验证
import requests
# 禁用 SSL 证书验证(不推荐在生产环境使用)
response = requests.get('https://httpbin.org/get', verify=False)
# 指定自定义证书文件
response = requests.get('https://httpbin.org/get', verify='/path/to/cert.pem')
最佳实践
- 使用 Session 对象:当需要发送多个请求时,使用 Session 可以重用底层的TCP连接,提高性能。
- 设置合理的超时时间:避免程序因为网络问题而无限等待。
- 异常处理:始终处理可能出现的网络异常。
- 使用 raise_for_status():检查HTTP状态码并在出错时抛出异常。
- 流式下载大文件:避免将大文件完全加载到内存中。