import scrapy import re class Gitlog2Spider(scrapy.Spider): name = 'github2' allowed_domains = ['github.com'] start_urls = ['http://github.com/login'] def parse(self, response): # 通过scrapy.FormRequest.from_response()提交数据 # 源码中已有的数据不需要再进行提交 # 可以用来实现快速登录 yield scrapy.FormRequest.from_response( # 请求响应结果 response=response, # 提交数据 formdata={ 'login': '@gmail.com', 'password': 'G0000' }, callback=self.after_login ) # def after_login(self,response): # # # print(response.body.decode('utf-8')) # with open('./github2.html','w',encoding='utf-8') as f: # # f.write(response.body.decode('utf-8')) def after_login(self,response): print(re.findall("oookkkssss",response.body.decode())) # print(response.body.decode())