skip already downloaded files
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a1b4f35
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,107 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+.static_storage/
+.media/
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
diff --git a/main.py b/main.py
deleted file mode 100644
index a1ae328..0000000
--- a/main.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import requests
-import shutil
-import pathlib
-import os.path
-# import pprint
-# import argparse
-
-API_ENDPOINT = 'https://cloud-api.yandex.net/v1/disk/public/resources/{}?public_key={}'
-
-
-def save_file(url, save_path):
- r = requests.get(url, stream=True)
- # TODO: verify size, checksum and re-download in necessary
- with open(save_path, 'wb') as f:
- shutil.copyfileobj(r.raw, f)
-
-
-def recurse(url, save_path):
- pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
- items = requests.get(API_ENDPOINT.format('', url)).json()['_embedded']['items']
- for i in items:
- # pprint.pprint(i)
- new_path = os.path.join(save_path, i['name'])
- if 'file' in i:
- print('file {}'.format(new_path))
- save_file(i['file'], new_path)
- else:
- print('folder {}'.format(new_path))
- recurse(i['public_url'], new_path)
-
-
-target_root = 'https://yadi.sk/d/AhgSCPivmcMff'
-recurse(target_root, 'dl')
diff --git a/yadiredo.py b/yadiredo.py
new file mode 100755
index 0000000..62074cb
--- /dev/null
+++ b/yadiredo.py
@@ -0,0 +1,52 @@
+import shutil
+import pathlib
+import os
+import argparse
+import hashlib
+import pprint
+import requests
+
+API_ENDPOINT = 'https://cloud-api.yandex.net/v1/disk/public/resources/{}?public_key={}'
+
+
+def md5sum(filename):
+ md5 = hashlib.md5()
+ with open(filename, 'rb') as f:
+ for chunk in iter(lambda: f.read(128 * md5.block_size), b''):
+ md5.update(chunk)
+ return md5.hexdigest()
+
+
+def check_and_download_file(url, path, size, checksum, dry):
+ if os.path.isfile(path):
+ if size == os.path.getsize(path):
+ if checksum == md5sum(path):
+ print('skipping correctly downloaded file {}'.format(path))
+ return
+ if not dry:
+ print('downloading {}'.format(path))
+ r = requests.get(url, stream=True)
+ with open(path, 'wb') as f:
+ shutil.copyfileobj(r.raw, f)
+
+
+def download_directory(url, save_path, dry):
+ pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
+ items = requests.get(API_ENDPOINT.format('', url)).json()['_embedded']['items']
+ for i in items:
+ # pprint.pprint(i)
+ new_path = os.path.join(save_path, i['name'])
+ if 'file' in i:
+ check_and_download_file(i['file'], new_path, i['size'], i['md5'], dry)
+ else:
+ print('entering folder {}'.format(new_path))
+ download_directory(i['public_url'], new_path, dry)
+
+
+parser = argparse.ArgumentParser(description='Yandex.Disk downloader.')
+parser.add_argument('url')
+parser.add_argument('-o', dest='output_path', default='output')
+parser.add_argument('--dry', action='store_const', const=True, default=False)
+args = parser.parse_args()
+
+download_directory(args.url, args.output_path, args.dry)