# # Author: Sooele
#
from urllib import request
from urllib import parse
#
# resp = request.urlopen('https://www.sooele.com')
# print(resp.read())
# urlretrieve 函数的用法(下载保存html\文件)
# resp = request.urlretrieve('https://www.sooele.com','index.html')
#urlencode函数
url = 'https://www.baidu.com/s'
params = {"wd": "刘德华"}
qs = parse.urlencode(params)
url = url + "?" +qs
print(url)
resp = request.urlopen(url,)
print(resp.read())
# Author: Sooele
from urllib import parse
url = 'https://www,baidu.com/s?wq=python&username=asd#1'
#方法一,有params
#res = parse.urlparse(url)
#方法二,无params
res = parse.urlsplit(url)
print(res)
print('scheme:',res.scheme)
print('netloc:',res.netloc)
print('path:',res.path)
#print('params:',res.params)
print('query:',res.query)
print('fragment:',res.fragment)
import urllib.parse
import urllib.request
url = 'http://www.baidu.com/s'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {'name' : 'WHY',
'location' : 'SDU',
'language' : 'Python',
'ie' : 'utf-8',
'wd' : 'python' }
headers = { 'User-Agent' : user_agent }
data = urllib.parse.urlencode(values)
#data=data.encode(encoding='UTF8')
req = urllib.request.Request(url+'?'+data)
#, data, headers)
response = urllib.request.urlopen(req)
the_page = response.read()
print(the_page.decode('UTF8'))
from urllib import request
#没有代理
# url = 'http://httpbin.org/ip'
# resp = request.urlopen(url)
# print(resp.read())
#使用代理
url = 'http://httpbin.org/ip'
# 1.使用ProxyHandler,传入代理构建一个Handler
handeler = request.ProxyHandler({"http":"59.62.35.56:9000"})
# 2.使用上面Handler构建一个opener
opner = request.build_opener(handeler)
# 3.使用opener发送一个请求
resp = opner.open(url)
print(resp.read())
代理
from urllib import request
#没有代理
# url = 'http://httpbin.org/ip'
# resp = request.urlopen(url)
# print(resp.read())
#使用代理
url = 'http://httpbin.org/ip'
# 1.使用ProxyHandler,传入代理构建一个Handler
handeler = request.ProxyHandler({"http":"61.164.39.69:53281"})
# 2.使用上面Handler构建一个opener
opner = request.build_opener(handeler)
# 3.使用opener发送一个请求
resp = opner.open(url)
print(resp.read())
# Author: Sooele
from urllib import request
dapeng_url = 'http://www.renren.com/971290517/profile'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
}
req = request.Request(url=dapeng_url,headers=headers)
resp = request.urlopen(req)
print(resp.read().decode('UTF8'))
Cookie
# Author: Sooele
from urllib import request
dapeng_url = 'http://www.renren.com/971290517/profile'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
"Cookie" : 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
}
req = request.Request(url=dapeng_url,headers=headers)
resp = request.urlopen(req)
# print(resp.read().decode('UTF8'))
with open('renren.html','w',encoding='UTF8') as fp:
#write函数必须写入一个str数据类型
#resp.read()读出来的是一个bytes数据类型
#str--encode--bayes
fp.write(resp.read().decode('UTF8'))
cookie
# Author: Sooele
from urllib import request
from http.cookiejar import MozillaCookieJar
cookiejar = MozillaCookieJar('cookie.txt')
handler = request.HTTPCookieProcessor(cookiejar)
opener = request.build_opener(handler)
resp = opener.open('https://www.baidu.com/')
cookiejar.save()
cookie过期保存
# Author: Sooele
from urllib import request
from http.cookiejar import MozillaCookieJar
cookiejar = MozillaCookieJar('cookie.txt')
handler = request.HTTPCookieProcessor(cookiejar)
opener = request.build_opener(handler)
resp = opener.open('http://httpbin.org/cookies/set/aaa/bbb')
#ignore_discard=True过期ciikie也会保存
cookiejar.save(ignore_discard=True)
# Author: Sooele
from urllib import request
from http.cookiejar import MozillaCookieJar
cookiejar = MozillaCookieJar('cookie.txt')
cookiejar.load()
handler = request.HTTPCookieProcessor(cookiejar)
opener = request.build_opener(handler)
resp = opener.open('http://httpbin.org/cookies/set/aaa/basdbb')
for cookie in cookiejar:
print(cookie)
#ignore_discard=True过期ciikie也会保存
# cookiejar.save(ignore_discard=True)