Source code for instalooter.worker
# coding: utf-8
"""Background download thread.
"""
from __future__ import absolute_import
from __future__ import unicode_literals
import io
import operator
import threading
import time
import requests
import six
import tenacity
from ._impl import PIL, piexif, json
[docs]class InstaDownloader(threading.Thread):
"""The background InstaLooter worker class.
"""
_tenacity_options = {
"stop": tenacity.stop_after_attempt(5),
"wait": tenacity.wait_exponential(1, 10),
}
def __init__(self,
queue,
destination,
namegen,
add_metadata=False,
dump_json=False,
dump_only=False,
pbar=None,
session=None):
super(InstaDownloader, self).__init__()
self.queue = queue
self.destination = destination
self.namegen = namegen
self.session = session or requests.Session()
self.pbar = pbar
self.dump_only = dump_only
self.dump_json = dump_json or dump_only
self.add_metadata = add_metadata
self._killed = False
self._downloading = None
retry = tenacity.retry(**self._tenacity_options)
self._DOWNLOAD_METHODS = {
"GraphImage": retry(self._download_image),
"GraphVideo": retry(self._download_video),
"GraphSidecar": self._download_sidecar,
}
def _download_image(self, media):
url = media['display_url']
filename = self.namegen.file(media)
if self.destination.exists(filename):
return
# FIXME: find a way to remove failed temporary downloads
with self.destination.open(filename, "wb") as f:
with self.session.get(url) as res:
f.write(res.content)
self._set_time(media, filename)
def _download_video(self, media):
url = media['video_url']
filename = self.namegen.file(media)
if self.destination.exists(filename):
return
# FIXME: find a way to remove failed temporary downloads
with self.destination.open(filename, "wb") as f:
with self.session.get(url) as res:
for chunk in res.iter_content(io.DEFAULT_BUFFER_SIZE):
f.write(chunk)
self._set_time(media, filename)
def _download_sidecar(self, media):
edges = media.pop('edge_sidecar_to_children')['edges']
for edge in six.moves.map(operator.itemgetter('node'), edges):
for key, value in six.iteritems(media):
edge.setdefault(key, value)
self._DOWNLOAD_METHODS[edge['__typename']](edge)
def _set_time(self, media, filename):
details = {}
details["modified"] = details["accessed"] = details["created"] = \
media.get('taken_at_timestamp') or media['date']
self.destination.setinfo(filename, {"details": details})
def _dump(self, media):
basename = self.namegen.base(media)
filename = "{}.json".format(basename)
mode = "w" if six.PY3 else "wb"
with self.destination.open(filename, mode) as dest:
json.dump(media, dest, indent=4, sort_keys=True)
self._set_time(media, filename)
[docs] def run(self):
while not self._killed:
try:
media = self.queue.get_nowait()
# Received a poison pill: break the loop
if media is None:
self._killed = True
else:
# Download media
if not self.dump_only:
self._DOWNLOAD_METHODS[media["__typename"]](media)
# Dump JSON metadata if needed
if self.dump_json:
self._dump(media)
# Update progress bar if any
if self.pbar is not None and not self._killed:
with self.pbar.get_lock():
self.pbar.update()
self.queue.task_done()
except six.moves.queue.Empty:
time.sleep(1)
def terminate(self):
self._killed = True