get_links download_link setup_download_dir
import json
import logging
import os
from pathlib import Path
from urllib.request import urlopen, Request
logger = logging.getLogger(__name__)
def get_links(client_id):
headers = {'Authorization': 'Client-ID {}'.format(client_id)}
req = Request('https://api.imgur.com/3/gallery/', headers=headers, method='GET')
with urlopen(req) as resp:
data = json.loads(resp.readall().decode('utf-8'))
return map(lambda item: item['link'], data['data'])
def download_link(directory, link):
logger.info('Downloading %s', link)
download_path = directory / os.path.basename(link)
with urlopen(link) as image, download_path.open('wb') as f:
f.write(image.readall())
def setup_download_dir():
download_dir = Path('images')
if not download_dir.exists():
download_dir.mkdir()
return download_dir
import logging
import os
from time import time
from download import setup_download_dir, get_links, download_link
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.getLogger('requests').setLevel(logging.CRITICAL)
logger = logging.getLogger(__name__)
def main():
ts = time()
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
for link in links:
download_link(download_dir, link)
print('Took {}s'.format(time() - ts))
if __name__ == '__main__':
main()
from queue import Queue
from threading import Thread
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# Get the work from the queue and expand the tuple
# 从队列中获取任务并扩展tuple
directory, link = self.queue.get()
download_link(directory, link)
self.queue.task_done()
def main():
ts = time()
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
# Create a queue to communicate with the worker threads
queue = Queue()
# Create 8 worker threads
# 创建八个工作线程
for x in range(8):
worker = DownloadWorker(queue)
# Setting daemon to True will let the main thread exit even though the workers are blocking
# 将daemon设置为True将会使主线程退出,即使worker都阻塞了
worker.daemon = True
worker.start()
# Put the tasks into the queue as a tuple
# 将任务以tuple的形式放入队列中
for link in links:
logger.info('Queueing {}'.format(link))
queue.put((download_dir, link))
# Causes the main thread to wait for the queue to finish processing all the tasks
# 让主线程等待队列完成所有的任务
queue.join()
print('Took {}'.format(time() - ts))
from functools import partial
from multiprocessing.pool import Pool
def main():
ts = time()
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
download = partial(download_link, download_dir)
with Pool(8) as p:
p.map(download, links)
print('Took {}s'.format(time() - ts))
from redis import Redis
from rq import Queue
def main():
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
q = Queue(connection=Redis(host='localhost', port=6379))
for link in links:
q.enqueue(download_link, download_dir, link)
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有