差分
このページの2つのバージョン間の差分を表示します。
両方とも前のリビジョン 前のリビジョン 次のリビジョン | 前のリビジョン 次のリビジョン両方とも次のリビジョン | ||
python:pip [2019/08/08 08:40] – [pip のダウンロードキャッシュ] ともやん | python:pip [2019/08/12 18:59] – [pip パッケージを使った HTTP URL ダウンローダーの作り方] ともやん | ||
---|---|---|---|
行 73: | 行 73: | ||
%LocalAppData%\pip\Cache | %LocalAppData%\pip\Cache | ||
</ | </ | ||
+ | |||
+ | ===== pip パッケージを使った HTTP URL ダウンローダーの作り方 ===== | ||
+ | pip 19.2.2 で動作を確認した。\\ | ||
+ | 以下は pip パッケージを利用した HTTP URL のダウンローダーのコードである。\\ | ||
+ | この **download_http_url()** 関数を利用すれば、パッケージではない様々なファイルを pip と同様に進行状況を表示しながらダウンロードする事ができる。\\ | ||
+ | \\ | ||
+ | <WRAP prewrap 100%> | ||
+ | <file python pip_downloader.py> | ||
+ | # | ||
+ | ## -*- coding: utf-8 -*- | ||
+ | import cgi | ||
+ | import mimetypes | ||
+ | import os | ||
+ | |||
+ | from pip._internal.download import PipSession | ||
+ | from pip._internal.models.link import Link | ||
+ | from pip._vendor.requests.models import CONTENT_CHUNK_SIZE | ||
+ | from pip._internal.utils.misc import splitext, consume, format_size | ||
+ | from pip._internal.utils.ui import DownloadProgressProvider | ||
+ | from pip._vendor import requests | ||
+ | |||
+ | import logging | ||
+ | |||
+ | logger = logging.getLogger(__name__) | ||
+ | |||
+ | def download_http_url( | ||
+ | url, # type: str | ||
+ | download_dir, | ||
+ | hashes=None, | ||
+ | progress_bar=' | ||
+ | ): | ||
+ | logger.debug(' | ||
+ | link = Link(url) | ||
+ | session = PipSession() | ||
+ | | ||
+ | def _download_url( | ||
+ | resp, # type: Response | ||
+ | link, # type: Link | ||
+ | content_file, | ||
+ | hashes, | ||
+ | progress_bar | ||
+ | ): | ||
+ | # type: (...) -> None | ||
+ | try: | ||
+ | total_length = int(resp.headers[' | ||
+ | except (ValueError, | ||
+ | total_length = 0 | ||
+ | |||
+ | cached_resp = getattr(resp, | ||
+ | if cached_resp: | ||
+ | show_progress = False | ||
+ | elif total_length > (40 * 1000): | ||
+ | show_progress = True | ||
+ | elif not total_length: | ||
+ | show_progress = True | ||
+ | else: | ||
+ | show_progress = False | ||
+ | |||
+ | def resp_read(chunk_size): | ||
+ | try: | ||
+ | # Special case for urllib3. | ||
+ | for chunk in resp.raw.stream( | ||
+ | chunk_size, | ||
+ | decode_content=False): | ||
+ | yield chunk | ||
+ | except AttributeError: | ||
+ | # Standard file-like object. | ||
+ | while True: | ||
+ | chunk = resp.raw.read(chunk_size) | ||
+ | if not chunk: | ||
+ | break | ||
+ | yield chunk | ||
+ | |||
+ | def written_chunks(chunks): | ||
+ | for chunk in chunks: | ||
+ | content_file.write(chunk) | ||
+ | yield chunk | ||
+ | |||
+ | def _progress_indicator(iterable, | ||
+ | return iterable | ||
+ | |||
+ | progress_indicator = _progress_indicator | ||
+ | |||
+ | if show_progress: | ||
+ | progress_indicator = DownloadProgressProvider(progress_bar, | ||
+ | max=total_length) | ||
+ | if total_length: | ||
+ | print(' | ||
+ | else: | ||
+ | print(f' | ||
+ | elif cached_resp: | ||
+ | print(f' | ||
+ | else: | ||
+ | print(f' | ||
+ | |||
+ | print(f' | ||
+ | |||
+ | downloaded_chunks = written_chunks( | ||
+ | progress_indicator( | ||
+ | resp_read(CONTENT_CHUNK_SIZE), | ||
+ | CONTENT_CHUNK_SIZE | ||
+ | ) | ||
+ | ) | ||
+ | if hashes: | ||
+ | hashes.check_against_chunks(downloaded_chunks) | ||
+ | else: | ||
+ | consume(downloaded_chunks) | ||
+ | | ||
+ | # type: (...) -> Tuple[str, str] | ||
+ | """ | ||
+ | target_url = link.url.split('#', | ||
+ | try: | ||
+ | resp = session.get( | ||
+ | target_url, | ||
+ | headers={" | ||
+ | stream=True, | ||
+ | ) | ||
+ | resp.raise_for_status() | ||
+ | except requests.HTTPError as exc: | ||
+ | print(' | ||
+ | raise | ||
+ | |||
+ | content_type = resp.headers.get(' | ||
+ | filename = link.filename | ||
+ | # Have a look at the Content-Disposition header for a better guess | ||
+ | content_disposition = resp.headers.get(' | ||
+ | if content_disposition: | ||
+ | # type: (str, str) -> str | ||
+ | def sanitize_content_filename(filename): | ||
+ | # type: (str) -> str | ||
+ | """ | ||
+ | Sanitize the " | ||
+ | """ | ||
+ | return os.path.basename(filename) | ||
+ | | ||
+ | """ | ||
+ | Parse the " | ||
+ | return the default filename if the result is empty. | ||
+ | """ | ||
+ | _type, params = cgi.parse_header(content_disposition) | ||
+ | filename = params.get(' | ||
+ | if filename: | ||
+ | # We need to sanitize the filename to prevent directory traversal | ||
+ | # in case the filename contains " | ||
+ | filename = sanitize_content_filename(filename) | ||
+ | |||
+ | ext = splitext(filename)[1] | ||
+ | if not ext: | ||
+ | ext = mimetypes.guess_extension(content_type) | ||
+ | if ext: | ||
+ | filename += ext | ||
+ | if not ext and link.url != resp.url: | ||
+ | ext = os.path.splitext(resp.url)[1] | ||
+ | if ext: | ||
+ | filename += ext | ||
+ | file_path = os.path.join(download_dir, | ||
+ | with open(file_path, | ||
+ | _download_url(resp, | ||
+ | return file_path, content_type | ||
+ | </ | ||
+ | </ | ||
+ | pip_downloader.py の使い方。\\ | ||
+ | <WRAP prewrap 100%> | ||
+ | <file python main.py> | ||
+ | # | ||
+ | ## -*- coding: utf-8 -*- | ||
+ | import os | ||
+ | # pip_downloader.py をインポートする | ||
+ | import pip_downloader | ||
+ | |||
+ | # メイン処理 | ||
+ | def main(): | ||
+ | pip_downloader.download_http_url( | ||
+ | ' | ||
+ | os.environ[' | ||
+ | ) | ||
+ | |||
+ | # スクリプト起動時に main() を実行 | ||
+ | if __name__ == " | ||
+ | main() | ||
+ | </ | ||
+ | </ | ||
+ | main.py 実行する。\\ | ||
+ | <WRAP prewrap 100%> | ||
+ | < | ||
+ | > python main.py | ||
+ | Downloading https:// | ||
+ | Downloading from URL https:// | ||
+ | | ||
+ | </ | ||
+ | </ | ||
===== 参考文献 ===== | ===== 参考文献 ===== | ||
[[http:// | [[http:// |