python(爬虫2网页)

# Author: Sooele

from urllib import request
import gevent,time
from gevent import monkey
monkey.patch_all() #把当前程序的所有的io操作给我单独的做上标记

def f(url):
    print('GET: %s' % url)
    resp = request.urlopen(url)
    data = resp.read()
    print('%d bytes received from %s.' % (len(data), url))

urls = ['https://www.python.org/',
        'https://www.yahoo.com/',
        'https://github.com/' ]
time_start = time.time()
for url in urls:
    f(url)
print("同步cost",time.time() - time_start)
async_time_start = time.time()
gevent.joinall([
    gevent.spawn(f, 'https://www.python.org/'),
    gevent.spawn(f, 'https://www.yahoo.com/'),
    gevent.spawn(f, 'https://github.com/'),
])
print("异步cost",time.time() - async_time_start)

结果

GET: https://www.python.org/
48954 bytes received from https://www.python.org/.
GET: https://www.yahoo.com/
512683 bytes received from https://www.yahoo.com/.
GET: https://github.com/
51483 bytes received from https://github.com/.
同步cost 11.521910190582275
GET: https://www.python.org/
GET: https://www.yahoo.com/
GET: https://github.com/
514419 bytes received from https://www.yahoo.com/.
48954 bytes received from https://www.python.org/.
51483 bytes received from https://github.com/.
异步cost 2.8574001789093018

发表评论