import time import requests from bs4 import BeautifulSoup as bs from time import sleep def get_url_douban(url): headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', } response = requests.get(url=url,headers=headers).text douban_info = bs(response,'html.parser') # print(douban_info) # Python 中使用 for in 形式的循环,Python 使用缩进来做语句块分隔 ## 混合使用模块和 for 的功能,因为 tags atag 对象既能支持 find_all 又拥有迭代功能 for tags in douban_info.find_all('div', attrs={'class': 'pl2'}): for atag in tags.find_all('a',): # 获取所有链接 hrdf_url = atag.get('href') # print(atag.get('href')) # 获取图书名字 hrdf_name = atag.get('title') # print(atag.get('title')) with open('douban.txt','a',encoding='utf-8')as file: file.write(hrdf_name+':'+'\n') file.write(hrdf_url+'\n') print(f'正在写入--{hrdf_name}:{hrdf_url}') file.close # 方法1 for page in range(10): astring = f'https://book.douban.com/top250?start={ page * 25}' # print(astring) get_url_douban(astring) page=page+1 print(f'正在输出第{page}页') time.sleep(1) # # 方法2 # url = tuple(f'https://book.douban.com/top250?start={ page * 25}' for page in range(10)) # if __name__ == '__main__': # for page in url: # get_url_douban(page) # print() # sleep(5)