# Author: Sooele
import requests
from lxml import etree
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
}
url = 'http://sc.chinaz.com/jianli/free.html'
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
page_text = response.text
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@id="container"]/div')
for div in div_list:
detail_url = div.xpath('./a/@href')[0]
name = div.xpath('./a/img/@alt')[0]
print(name)
break
#%%
#爬取站长素材中的简历模板
import requests
import random
from lxml import etree
headers = {
'Connection':'close', #当请求成功后,马上断开该次请求(及时释放请求池中的资源)
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
}
url = 'http://sc.chinaz.com/jianli/free_%d.html'
for page in range(1,4):
if page == 1:
new_url = 'http://sc.chinaz.com/jianli/free.html'
else:
new_url = format(url%page)
response = requests.get(url=new_url,headers=headers)
response.encoding = 'utf-8'
page_text = response.text
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@id="container"]/div')
for div in div_list:
detail_url = div.xpath('./a/@href')[0]
name = div.xpath('./a/img/@alt')[0]
detail_page = requests.get(url=detail_url,headers=headers).text
tree = etree.HTML(detail_page)
download_list = tree.xpath('//div[@class="clearfix mt20 downlist"]/ul/li/a/@href')
download_url = random.choice(download_list)
data = requests.get(url=download_url,headers=headers).content
fileName = name+'.rar'
with open(fileName,'wb') as fp:
fp.write(data)
print(fileName,'下载成功')
相关