url = 'http://httpbin.org/get' headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
r = requests.get('http://www.baidu.com/') print(r.status_code) #状态 print(r.text) #内容
对于带参数的URL,传入一个dict作为params参数,如下:
1 2 3 4 5
import requests
r = requests.get('http://www.baidu.com/', params={'q': 'python', 'cat': '1001'}) print(r.url) #实际请求的URL print(r.text)
requests的方便之处还在于,对于特定类型的响应,例如JSON,可以直接获取,如下:
1 2 3 4
r = requests.get('https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20weather.forecast%20where%20woeid%20%3D%202151330&format=json') r.json()
try: # 3.1秒后连接超时,27秒后读取超时 r = requests.get(url, timeout=(3.1, 27)) except requests.exceptions.RequestException as e: print(e)
超时重连
1 2 3 4 5 6 7 8
def gethtml(url): i = 0 while i < 3: try: html = requests.get(url, timeout=5).text return html except requests.exceptions.RequestException: i += 1
添加代理
同添加headers方法,代理参数也要是一个dict,如下:
1 2 3 4 5 6 7 8
heads = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit' } proxy = { 'http': 'http://120.25.253.234:812', 'https''https://163.125.222.244:8123' } r = requests.get('https://www.baidu.com/', headers=heads, proxies=proxy)