# # Author: Sooele # from urllib import request from urllib import parse # # resp = request.urlopen('https://www.sooele.com') # print(resp.read()) # urlretrieve 函数的用法(下载保存html\文件) # resp = request.urlretrieve('https://www.sooele.com','index.html') #urlencode函数 url = 'https://www.baidu.com/s' params = {"wd": "刘德华"} qs = parse.urlencode(params) url = url + "?" +qs print(url) resp = request.urlopen(url,) print(resp.read())
# Author: Sooele from urllib import parse url = 'https://www,baidu.com/s?wq=python&username=asd#1' #方法一,有params #res = parse.urlparse(url) #方法二,无params res = parse.urlsplit(url) print(res) print('scheme:',res.scheme) print('netloc:',res.netloc) print('path:',res.path) #print('params:',res.params) print('query:',res.query) print('fragment:',res.fragment)
import urllib.parse import urllib.request url = 'http://www.baidu.com/s' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values = {'name' : 'WHY', 'location' : 'SDU', 'language' : 'Python', 'ie' : 'utf-8', 'wd' : 'python' } headers = { 'User-Agent' : user_agent } data = urllib.parse.urlencode(values) #data=data.encode(encoding='UTF8') req = urllib.request.Request(url+'?'+data) #, data, headers) response = urllib.request.urlopen(req) the_page = response.read() print(the_page.decode('UTF8'))
from urllib import request #没有代理 # url = 'http://httpbin.org/ip' # resp = request.urlopen(url) # print(resp.read()) #使用代理 url = 'http://httpbin.org/ip' # 1.使用ProxyHandler,传入代理构建一个Handler handeler = request.ProxyHandler({"http":"59.62.35.56:9000"}) # 2.使用上面Handler构建一个opener opner = request.build_opener(handeler) # 3.使用opener发送一个请求 resp = opner.open(url) print(resp.read())
代理
from urllib import request #没有代理 # url = 'http://httpbin.org/ip' # resp = request.urlopen(url) # print(resp.read()) #使用代理 url = 'http://httpbin.org/ip' # 1.使用ProxyHandler,传入代理构建一个Handler handeler = request.ProxyHandler({"http":"61.164.39.69:53281"}) # 2.使用上面Handler构建一个opener opner = request.build_opener(handeler) # 3.使用opener发送一个请求 resp = opner.open(url) print(resp.read())
# Author: Sooele from urllib import request dapeng_url = 'http://www.renren.com/971290517/profile' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', } req = request.Request(url=dapeng_url,headers=headers) resp = request.urlopen(req) print(resp.read().decode('UTF8'))
Cookie
# Author: Sooele from urllib import request dapeng_url = 'http://www.renren.com/971290517/profile' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', "Cookie" : 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' } req = request.Request(url=dapeng_url,headers=headers) resp = request.urlopen(req) # print(resp.read().decode('UTF8')) with open('renren.html','w',encoding='UTF8') as fp: #write函数必须写入一个str数据类型 #resp.read()读出来的是一个bytes数据类型 #str--encode--bayes fp.write(resp.read().decode('UTF8'))
cookie
# Author: Sooele from urllib import request from http.cookiejar import MozillaCookieJar cookiejar = MozillaCookieJar('cookie.txt') handler = request.HTTPCookieProcessor(cookiejar) opener = request.build_opener(handler) resp = opener.open('https://www.baidu.com/') cookiejar.save()
cookie过期保存
# Author: Sooele from urllib import request from http.cookiejar import MozillaCookieJar cookiejar = MozillaCookieJar('cookie.txt') handler = request.HTTPCookieProcessor(cookiejar) opener = request.build_opener(handler) resp = opener.open('http://httpbin.org/cookies/set/aaa/bbb') #ignore_discard=True过期ciikie也会保存 cookiejar.save(ignore_discard=True)
# Author: Sooele from urllib import request from http.cookiejar import MozillaCookieJar cookiejar = MozillaCookieJar('cookie.txt') cookiejar.load() handler = request.HTTPCookieProcessor(cookiejar) opener = request.build_opener(handler) resp = opener.open('http://httpbin.org/cookies/set/aaa/basdbb') for cookie in cookiejar: print(cookie) #ignore_discard=True过期ciikie也会保存 # cookiejar.save(ignore_discard=True)