[+] rewrite download command: async, pydantic entry model, terminal-aware progress
1. download_entry_t pydantic model (frozen=True) replaces tuples; 2. download_t class with async run(), _download_one(), _run_parallel(), _run_aria2c_batch(); 3. asyncio.Semaphore for -j concurrency, run_in_executor for blocking I/O; 4. .part file pattern: download to dest.pkg.part, rename on success; 5. curl -C - and aria2c --continue=true for resume support; 6. curl/aria2c stdout/stderr redirected to devnull; 7. --dry-run, --verify-checksum BooleanOptionalAction flags; 8. --progress-mode plain|interactive using terminal.py renderer; 9. progress_t with byte-based ETA (when --size= available) and pkg-rate fallback; 10. parse --size=BYTES and --hash=sha256: from compiled requirements; 11. update test_cli.py downloader tests for .part rename and **kwargs; 12. update test_download_cli.py for pydantic model and new progress_t constructor;
This commit is contained in:
parent
b98173511e
commit
b7f7d3d291
@ -1,10 +1,10 @@
|
||||
"""Download compiled packages."""
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import asyncio
|
||||
import enum
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import subprocess
|
||||
@ -12,13 +12,28 @@ import time
|
||||
import urllib.request
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
ClassVar,
|
||||
Optional,
|
||||
)
|
||||
|
||||
import pydantic
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from online.fxreader.pr34.commands_typed.terminal import field_t
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class download_entry_t(pydantic.BaseModel):
|
||||
model_config = pydantic.ConfigDict(frozen=True)
|
||||
|
||||
url: str
|
||||
filename: str
|
||||
sha256: str = ''
|
||||
csize: int = 0
|
||||
|
||||
|
||||
class parse_rate_t:
|
||||
class constants_t:
|
||||
rate_re: ClassVar[re.Pattern[str]] = re.compile(
|
||||
@ -63,122 +78,246 @@ class downloader_t:
|
||||
limit_rate: int,
|
||||
) -> None:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
part = dest.with_suffix(dest.suffix + '.part')
|
||||
devnull = subprocess.DEVNULL
|
||||
|
||||
if backend is downloader_t.constants_t.backend_t.urllib:
|
||||
urllib.request.urlretrieve(url, str(dest))
|
||||
urllib.request.urlretrieve(url, str(part))
|
||||
elif backend is downloader_t.constants_t.backend_t.curl:
|
||||
cmd = [
|
||||
'curl', '-fSL',
|
||||
'--limit-rate', '%d' % limit_rate,
|
||||
'-o', str(dest),
|
||||
url,
|
||||
]
|
||||
subprocess.check_call(cmd)
|
||||
subprocess.check_call(
|
||||
[
|
||||
'curl', '-fSL',
|
||||
'-C', '-',
|
||||
'--limit-rate', '%d' % limit_rate,
|
||||
'-o', str(part),
|
||||
url,
|
||||
],
|
||||
stdout=devnull,
|
||||
stderr=devnull,
|
||||
)
|
||||
elif backend is downloader_t.constants_t.backend_t.aria2c:
|
||||
cmd = [
|
||||
'aria2c',
|
||||
'--max-download-limit=%d' % limit_rate,
|
||||
'-d', str(dest.parent),
|
||||
'-o', dest.name,
|
||||
url,
|
||||
]
|
||||
subprocess.check_call(cmd)
|
||||
subprocess.check_call(
|
||||
[
|
||||
'aria2c',
|
||||
'--continue=true',
|
||||
'--max-download-limit=%d' % limit_rate,
|
||||
'-d', str(part.parent),
|
||||
'-o', part.name,
|
||||
url,
|
||||
],
|
||||
stdout=devnull,
|
||||
stderr=devnull,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
part.rename(dest)
|
||||
|
||||
@staticmethod
|
||||
def download_batch_aria2c(
|
||||
entries: list[tuple[str, pathlib.Path]],
|
||||
limit_rate: int,
|
||||
jobs: int,
|
||||
) -> None:
|
||||
"""Download multiple files using a single aria2c process with -j."""
|
||||
if len(entries) == 0:
|
||||
return
|
||||
|
||||
dest_dir = entries[0][1].parent
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# write input file for aria2c
|
||||
input_lines: list[str] = []
|
||||
for url, dest in entries:
|
||||
input_lines.append(url)
|
||||
input_lines.append(' dir=%s' % str(dest.parent))
|
||||
input_lines.append(' out=%s' % dest.name)
|
||||
|
||||
input_txt = '\n'.join(input_lines) + '\n'
|
||||
input_path = dest_dir / '.aria2c-input.txt'
|
||||
input_path.write_text(input_txt)
|
||||
|
||||
cmd = [
|
||||
'aria2c',
|
||||
'--max-download-limit=%d' % limit_rate,
|
||||
'-j', '%d' % jobs,
|
||||
'-i', str(input_path),
|
||||
]
|
||||
input_path.write_text('\n'.join(input_lines) + '\n')
|
||||
|
||||
try:
|
||||
subprocess.check_call(cmd)
|
||||
subprocess.check_call(
|
||||
[
|
||||
'aria2c',
|
||||
'--continue=true',
|
||||
'--max-download-limit=%d' % limit_rate,
|
||||
'-j', '%d' % jobs,
|
||||
'-i', str(input_path),
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
finally:
|
||||
input_path.unlink(missing_ok=True)
|
||||
|
||||
@staticmethod
|
||||
def verify_sha256(path: pathlib.Path, expected: str) -> bool:
|
||||
h = hashlib.sha256()
|
||||
with open(path, 'rb') as f:
|
||||
while True:
|
||||
chunk = f.read(64 * 1024)
|
||||
if not chunk:
|
||||
break
|
||||
h.update(chunk)
|
||||
return h.hexdigest() == expected
|
||||
|
||||
|
||||
class progress_t:
|
||||
class constants_t:
|
||||
class mode_t(enum.Enum):
|
||||
plain = 'plain'
|
||||
interactive = 'interactive'
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
total: int,
|
||||
total_bytes: int,
|
||||
already_done: int,
|
||||
total_pkgs: int,
|
||||
already_pkgs: int,
|
||||
to_download_pkgs: int,
|
||||
already_bytes: int,
|
||||
total_expected_bytes: int,
|
||||
to_download_bytes: int,
|
||||
has_sizes: bool,
|
||||
mode: 'progress_t.constants_t.mode_t' = constants_t.mode_t.plain,
|
||||
) -> None:
|
||||
self.total = total
|
||||
self.total_bytes = total_bytes
|
||||
self.already_done = already_done
|
||||
self.total_pkgs = total_pkgs
|
||||
self.already_pkgs = already_pkgs
|
||||
self.to_download_pkgs = to_download_pkgs
|
||||
self.already_bytes = already_bytes
|
||||
self.downloaded_this_run = 0
|
||||
self.downloaded_bytes_this_run = 0
|
||||
self.total_expected_bytes = total_expected_bytes
|
||||
self.to_download_bytes = to_download_bytes
|
||||
self.has_sizes = has_sizes
|
||||
self.downloaded_pkgs = 0
|
||||
self.downloaded_bytes = 0
|
||||
self.start_time = time.monotonic()
|
||||
|
||||
def update(self, file_bytes: int) -> None:
|
||||
self.downloaded_this_run += 1
|
||||
self.downloaded_bytes_this_run += file_bytes
|
||||
from online.fxreader.pr34.commands_typed.terminal import (
|
||||
render_mode_t,
|
||||
renderer_t,
|
||||
)
|
||||
|
||||
def format_plain(self) -> str:
|
||||
done = self.already_done + self.downloaded_this_run
|
||||
done_mb = (self.already_bytes + self.downloaded_bytes_this_run) / (1024 * 1024)
|
||||
total_mb = self.total_bytes / (1024 * 1024)
|
||||
render_mode = (
|
||||
render_mode_t.interactive
|
||||
if mode is progress_t.constants_t.mode_t.interactive
|
||||
else render_mode_t.plain
|
||||
)
|
||||
self._renderer = renderer_t(mode=render_mode)
|
||||
|
||||
def update(self, file_bytes: int) -> None:
|
||||
self.downloaded_pkgs += 1
|
||||
self.downloaded_bytes += file_bytes
|
||||
|
||||
@staticmethod
|
||||
def _fmt_bytes(b: int) -> str:
|
||||
if b >= 1024 * 1024 * 1024:
|
||||
return '%.1fG' % (b / (1024 * 1024 * 1024))
|
||||
return '%.1fM' % (b / (1024 * 1024))
|
||||
|
||||
def _build_fields(self) -> 'list[field_t]':
|
||||
from online.fxreader.pr34.commands_typed.terminal import (
|
||||
field_t,
|
||||
priority_t,
|
||||
)
|
||||
|
||||
done_pkgs = self.already_pkgs + self.downloaded_pkgs
|
||||
elapsed = time.monotonic() - self.start_time
|
||||
|
||||
if self.downloaded_this_run > 0 and elapsed > 0:
|
||||
rate = self.downloaded_this_run / elapsed
|
||||
remaining = self.total - done
|
||||
if rate > 0:
|
||||
eta_s = remaining / rate
|
||||
eta = '%dm%02ds' % (int(eta_s) // 60, int(eta_s) % 60)
|
||||
else:
|
||||
eta = '?'
|
||||
if rate >= 1:
|
||||
rate_str = '%.1f pkg/s' % rate
|
||||
else:
|
||||
rate_str = '%.1f s/pkg' % (1.0 / rate) if rate > 0 else '?'
|
||||
# speed
|
||||
if elapsed > 0 and self.downloaded_bytes > 0:
|
||||
speed = '%s/s' % self._fmt_bytes(int(self.downloaded_bytes / elapsed))
|
||||
else:
|
||||
eta = '?'
|
||||
rate_str = '?'
|
||||
speed = '-'
|
||||
|
||||
return (
|
||||
'[%d/%d] this_run=%d %.1f/%.1f MiB ETA=%s %s'
|
||||
% (done, self.total, self.downloaded_this_run, done_mb, total_mb, eta, rate_str)
|
||||
)
|
||||
# pkg rate
|
||||
if self.downloaded_pkgs > 0 and elapsed > 0:
|
||||
pkg_rate = self.downloaded_pkgs / elapsed
|
||||
if pkg_rate >= 1:
|
||||
rate_str = '%.1f pkg/s' % pkg_rate
|
||||
else:
|
||||
rate_str = '%.1f s/pkg' % (1.0 / pkg_rate) if pkg_rate > 0 else '-'
|
||||
else:
|
||||
rate_str = '-'
|
||||
|
||||
# ETA
|
||||
if self.has_sizes and elapsed > 0 and self.downloaded_bytes > 0:
|
||||
remaining_bytes = self.to_download_bytes - self.downloaded_bytes
|
||||
byte_rate = self.downloaded_bytes / elapsed
|
||||
eta_s = max(0, remaining_bytes / byte_rate) if byte_rate > 0 else 0
|
||||
eta = '%dm%02ds' % (int(eta_s) // 60, int(eta_s) % 60)
|
||||
elif self.downloaded_pkgs > 0 and elapsed > 0:
|
||||
remaining_pkgs = self.to_download_pkgs - self.downloaded_pkgs
|
||||
pkg_rate_v = self.downloaded_pkgs / elapsed
|
||||
eta_s = remaining_pkgs / pkg_rate_v if pkg_rate_v > 0 else 0
|
||||
eta = '~%dm%02ds' % (int(eta_s) // 60, int(eta_s) % 60)
|
||||
else:
|
||||
eta = '-'
|
||||
|
||||
# total bytes
|
||||
total_str = self._fmt_bytes(self.total_expected_bytes) if self.has_sizes else '?'
|
||||
|
||||
fields = [
|
||||
field_t(
|
||||
name='',
|
||||
value='[%d/%d]' % (done_pkgs, self.total_pkgs),
|
||||
priority=priority_t.critical,
|
||||
),
|
||||
field_t(
|
||||
name='new',
|
||||
value='%d/%d' % (self.downloaded_pkgs, self.to_download_pkgs),
|
||||
priority=priority_t.critical,
|
||||
),
|
||||
field_t(
|
||||
name='cached',
|
||||
value='%d/%s' % (self.already_pkgs, self._fmt_bytes(self.already_bytes)),
|
||||
priority=priority_t.normal,
|
||||
),
|
||||
field_t(
|
||||
name='dl',
|
||||
value='%s/%s' % (
|
||||
self._fmt_bytes(self.downloaded_bytes),
|
||||
self._fmt_bytes(self.to_download_bytes) if self.has_sizes else '?',
|
||||
),
|
||||
priority=priority_t.high,
|
||||
),
|
||||
field_t(
|
||||
name='total',
|
||||
value=total_str,
|
||||
priority=priority_t.low,
|
||||
),
|
||||
field_t(
|
||||
name='',
|
||||
value=speed,
|
||||
priority=priority_t.high,
|
||||
),
|
||||
field_t(
|
||||
name='',
|
||||
value=rate_str,
|
||||
priority=priority_t.normal,
|
||||
),
|
||||
field_t(
|
||||
name='ETA',
|
||||
value=eta,
|
||||
priority=priority_t.critical,
|
||||
),
|
||||
]
|
||||
return fields
|
||||
|
||||
def emit(self) -> None:
|
||||
self._renderer.emit(self._build_fields())
|
||||
|
||||
def finish(self) -> None:
|
||||
self._renderer.finish()
|
||||
|
||||
def format_plain(self) -> str:
|
||||
"""For tests and non-renderer usage."""
|
||||
from online.fxreader.pr34.commands_typed.terminal import line_formatter_t
|
||||
|
||||
return line_formatter_t.format(self._build_fields(), 200)
|
||||
|
||||
|
||||
class download_requirements_t:
|
||||
@staticmethod
|
||||
def parse_requirements(txt: str) -> list[tuple[str, str]]:
|
||||
entries: list[tuple[str, str]] = []
|
||||
def parse_requirements(txt: str) -> list[download_entry_t]:
|
||||
"""Parse compiled requirements into download entries."""
|
||||
entries: list[download_entry_t] = []
|
||||
url: Optional[str] = None
|
||||
|
||||
for line in txt.splitlines():
|
||||
@ -187,14 +326,12 @@ class download_requirements_t:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
candidate = line[1:].strip()
|
||||
# strip trailing annotation like "URL # pinned"
|
||||
if ' #' in candidate:
|
||||
candidate = candidate.split(' #', 1)[0].strip()
|
||||
if '/' in candidate and '://' in candidate:
|
||||
url = candidate
|
||||
continue
|
||||
|
||||
# strip trailing inline comment (e.g. "pkg==1.0 # pinned")
|
||||
if ' #' in line:
|
||||
line = line.split(' #', 1)[0].strip()
|
||||
|
||||
@ -202,16 +339,219 @@ class download_requirements_t:
|
||||
if len(parts) == 0:
|
||||
continue
|
||||
|
||||
pkg_spec = parts[0]
|
||||
sha256 = ''
|
||||
csize = 0
|
||||
for p in parts[1:]:
|
||||
if p.startswith('--hash=sha256:'):
|
||||
sha256 = p[len('--hash=sha256:'):]
|
||||
elif p.startswith('--size='):
|
||||
try:
|
||||
csize = int(p[len('--size='):])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if url is not None:
|
||||
filename = url.rsplit('/', 1)[-1] if '/' in url else pkg_spec
|
||||
entries.append((url, filename))
|
||||
filename = url.rsplit('/', 1)[-1] if '/' in url else parts[0]
|
||||
entries.append(download_entry_t(url=url, filename=filename, sha256=sha256, csize=csize))
|
||||
url = None
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
class download_t:
|
||||
def __init__(
|
||||
self,
|
||||
dest_dir: pathlib.Path,
|
||||
backend: downloader_t.constants_t.backend_t,
|
||||
limit_rate: int,
|
||||
jobs: int,
|
||||
verify: bool,
|
||||
show_progress: bool,
|
||||
progress_mode: progress_t.constants_t.mode_t = progress_t.constants_t.mode_t.plain,
|
||||
) -> None:
|
||||
self.dest_dir = dest_dir
|
||||
self.backend = backend
|
||||
self.limit_rate = limit_rate
|
||||
self.jobs = jobs
|
||||
self.verify = verify
|
||||
self.show_progress = show_progress
|
||||
self.progress_mode = progress_mode
|
||||
|
||||
def _classify(
|
||||
self, entries: list[download_entry_t],
|
||||
) -> tuple[list[download_entry_t], int, int]:
|
||||
"""Returns (to_download, already_count, already_bytes)."""
|
||||
to_download: list[download_entry_t] = []
|
||||
already_count = 0
|
||||
already_bytes = 0
|
||||
|
||||
for e in entries:
|
||||
dest_path = self.dest_dir / e.filename
|
||||
if dest_path.exists():
|
||||
if self.verify and e.sha256 != '':
|
||||
if not downloader_t.verify_sha256(dest_path, e.sha256):
|
||||
logger.warning(dict(msg='checksum mismatch, re-downloading', file=e.filename))
|
||||
to_download.append(e)
|
||||
continue
|
||||
already_count += 1
|
||||
already_bytes += dest_path.stat().st_size
|
||||
else:
|
||||
to_download.append(e)
|
||||
|
||||
return to_download, already_count, already_bytes
|
||||
|
||||
async def _download_one(self, e: download_entry_t) -> int:
|
||||
dest_path = self.dest_dir / e.filename
|
||||
logger.info(dict(msg='downloading', file=e.filename))
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
downloader_t.download,
|
||||
e.url,
|
||||
dest_path,
|
||||
self.backend,
|
||||
self.limit_rate,
|
||||
)
|
||||
|
||||
sz = dest_path.stat().st_size if dest_path.exists() else 0
|
||||
|
||||
if self.verify and e.sha256 != '' and dest_path.exists():
|
||||
ok = await loop.run_in_executor(None, downloader_t.verify_sha256, dest_path, e.sha256)
|
||||
if not ok:
|
||||
logger.error(dict(msg='checksum mismatch after download', file=e.filename))
|
||||
|
||||
logger.info(dict(msg='downloaded', file=e.filename, size=sz))
|
||||
return sz
|
||||
|
||||
def _make_progress(
|
||||
self,
|
||||
entries_total: int,
|
||||
already_count: int,
|
||||
to_download: list[download_entry_t],
|
||||
already_bytes: int,
|
||||
mode: progress_t.constants_t.mode_t = progress_t.constants_t.mode_t.plain,
|
||||
) -> Optional[progress_t]:
|
||||
if not self.show_progress:
|
||||
return None
|
||||
to_download_bytes = sum(e.csize for e in to_download)
|
||||
has_sizes = all(e.csize > 0 for e in to_download) and len(to_download) > 0
|
||||
total_expected_bytes = already_bytes + to_download_bytes
|
||||
return progress_t(
|
||||
total_pkgs=entries_total,
|
||||
already_pkgs=already_count,
|
||||
to_download_pkgs=len(to_download),
|
||||
already_bytes=already_bytes,
|
||||
total_expected_bytes=total_expected_bytes,
|
||||
to_download_bytes=to_download_bytes,
|
||||
has_sizes=has_sizes,
|
||||
mode=mode,
|
||||
)
|
||||
|
||||
async def run(self, entries: list[download_entry_t], dry_run: bool = False) -> int:
|
||||
to_download, already_count, already_bytes = self._classify(entries)
|
||||
|
||||
logger.info(dict(
|
||||
msg='download plan',
|
||||
total=len(entries),
|
||||
already=already_count,
|
||||
to_download=len(to_download),
|
||||
))
|
||||
|
||||
if dry_run:
|
||||
for e in to_download:
|
||||
print('%s -> %s' % (e.url, e.filename))
|
||||
print('total: %d to download, %d already present' % (len(to_download), already_count))
|
||||
return 0
|
||||
|
||||
if len(to_download) == 0:
|
||||
logger.info(dict(msg='nothing to download'))
|
||||
if self.show_progress:
|
||||
print('[%d/%d] nothing to download' % (already_count, len(entries)))
|
||||
return 0
|
||||
|
||||
progress = self._make_progress(
|
||||
len(entries), already_count, to_download, already_bytes,
|
||||
mode=self.progress_mode,
|
||||
)
|
||||
|
||||
# print initial status before first download
|
||||
if progress is not None:
|
||||
progress.emit()
|
||||
|
||||
if self.backend is downloader_t.constants_t.backend_t.aria2c and self.jobs > 1:
|
||||
return await self._run_aria2c_batch(to_download, progress)
|
||||
|
||||
return await self._run_parallel(to_download, progress)
|
||||
|
||||
async def _run_aria2c_batch(
|
||||
self,
|
||||
to_download: list[download_entry_t],
|
||||
progress: Optional[progress_t],
|
||||
) -> int:
|
||||
batch = [(e.url, self.dest_dir / e.filename) for e in to_download]
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
downloader_t.download_batch_aria2c,
|
||||
batch,
|
||||
self.limit_rate,
|
||||
self.jobs,
|
||||
)
|
||||
|
||||
for e in to_download:
|
||||
dest_path = self.dest_dir / e.filename
|
||||
sz = dest_path.stat().st_size if dest_path.exists() else 0
|
||||
if self.verify and e.sha256 != '' and dest_path.exists():
|
||||
ok = await loop.run_in_executor(None, downloader_t.verify_sha256, dest_path, e.sha256)
|
||||
if not ok:
|
||||
logger.error(dict(msg='checksum mismatch after download', file=e.filename))
|
||||
if progress is not None:
|
||||
progress.update(sz)
|
||||
logger.info(dict(msg='downloaded', file=e.filename, size=sz))
|
||||
|
||||
if progress is not None:
|
||||
progress.emit()
|
||||
progress.finish()
|
||||
|
||||
return 0
|
||||
|
||||
async def _run_parallel(
|
||||
self,
|
||||
to_download: list[download_entry_t],
|
||||
progress: Optional[progress_t],
|
||||
) -> int:
|
||||
sem = asyncio.Semaphore(self.jobs)
|
||||
|
||||
async def _bounded(e: download_entry_t) -> int:
|
||||
async with sem:
|
||||
return await self._download_one(e)
|
||||
|
||||
tasks = [asyncio.create_task(_bounded(e)) for e in to_download]
|
||||
|
||||
for coro in asyncio.as_completed(tasks):
|
||||
try:
|
||||
sz = await coro
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
if progress is not None:
|
||||
progress.update(sz)
|
||||
progress.emit()
|
||||
|
||||
if progress is not None:
|
||||
progress.finish()
|
||||
|
||||
downloaded = progress.downloaded_pkgs if progress else len(to_download)
|
||||
logger.info(dict(
|
||||
msg='download complete',
|
||||
downloaded=downloaded,
|
||||
total=progress.total_pkgs if progress else len(to_download),
|
||||
))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main(args: list[str]) -> int:
|
||||
download_parser = argparse.ArgumentParser(
|
||||
prog='online-fxreader-pr34-archlinux download',
|
||||
@ -259,94 +599,33 @@ def main(args: list[str]) -> int:
|
||||
default=1,
|
||||
help='parallel downloads (default: 1). For aria2c, passed as -j to aria2c directly.',
|
||||
)
|
||||
download_parser.add_argument(
|
||||
'--dry-run',
|
||||
default=False,
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help='print what would be downloaded without downloading',
|
||||
)
|
||||
download_parser.add_argument(
|
||||
'--verify-checksum',
|
||||
default=False,
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help='verify sha256 checksum of existing and downloaded files',
|
||||
)
|
||||
|
||||
download_options = download_parser.parse_args(args)
|
||||
opts = download_parser.parse_args(args)
|
||||
|
||||
dest_dir = pathlib.Path(download_options.dest_dir)
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dl = download_t(
|
||||
dest_dir=pathlib.Path(opts.dest_dir),
|
||||
backend=downloader_t.constants_t.backend_t(opts.downloader),
|
||||
limit_rate=parse_rate_t.parse(opts.limit_rate),
|
||||
jobs=opts.jobs,
|
||||
verify=opts.verify_checksum,
|
||||
show_progress=opts.progress,
|
||||
progress_mode=progress_t.constants_t.mode_t(opts.progress_mode),
|
||||
)
|
||||
|
||||
backend = downloader_t.constants_t.backend_t(download_options.downloader)
|
||||
limit_rate = parse_rate_t.parse(download_options.limit_rate)
|
||||
jobs: int = download_options.jobs
|
||||
entries = download_requirements_t.parse_requirements(
|
||||
pathlib.Path(opts.requirements).read_text()
|
||||
)
|
||||
|
||||
requirements_txt = pathlib.Path(download_options.requirements).read_text()
|
||||
entries = download_requirements_t.parse_requirements(requirements_txt)
|
||||
|
||||
# split into already-done vs to-download
|
||||
to_download: list[tuple[str, str]] = []
|
||||
already_count = 0
|
||||
already_bytes = 0
|
||||
total_bytes = 0
|
||||
|
||||
for url, filename in entries:
|
||||
dest_path = dest_dir / filename
|
||||
if dest_path.exists():
|
||||
already_count += 1
|
||||
sz = dest_path.stat().st_size
|
||||
already_bytes += sz
|
||||
total_bytes += sz
|
||||
else:
|
||||
to_download.append((url, filename))
|
||||
|
||||
# estimate total bytes (already + to_download as average of already)
|
||||
avg_size = already_bytes // already_count if already_count > 0 else 10 * 1024 * 1024
|
||||
total_bytes += avg_size * len(to_download)
|
||||
|
||||
progress: Optional[progress_t] = None
|
||||
if download_options.progress:
|
||||
progress = progress_t(
|
||||
total=len(entries),
|
||||
total_bytes=total_bytes,
|
||||
already_done=already_count,
|
||||
already_bytes=already_bytes,
|
||||
)
|
||||
if len(to_download) == 0:
|
||||
print(progress.format_plain())
|
||||
|
||||
# aria2c with -j: batch all into single process
|
||||
if backend is downloader_t.constants_t.backend_t.aria2c and jobs > 1 and len(to_download) > 0:
|
||||
batch = [(url, dest_dir / filename) for url, filename in to_download]
|
||||
downloader_t.download_batch_aria2c(batch, limit_rate, jobs)
|
||||
if progress is not None:
|
||||
for url, filename in to_download:
|
||||
dest_path = dest_dir / filename
|
||||
sz = dest_path.stat().st_size if dest_path.exists() else avg_size
|
||||
progress.update(sz)
|
||||
total_bytes = total_bytes - avg_size + sz
|
||||
progress.total_bytes = total_bytes
|
||||
print(progress.format_plain())
|
||||
logger.info(dict(msg='download complete', count=len(entries)))
|
||||
return 0
|
||||
|
||||
def _download_one(url: str, filename: str) -> int:
|
||||
dest_path = dest_dir / filename
|
||||
logger.debug(dict(msg='downloading', url=url, dest=str(dest_path)))
|
||||
downloader_t.download(
|
||||
url=url,
|
||||
dest=dest_path,
|
||||
backend=backend,
|
||||
limit_rate=limit_rate,
|
||||
)
|
||||
return dest_path.stat().st_size if dest_path.exists() else 0
|
||||
|
||||
if jobs > 1 and backend is not downloader_t.constants_t.backend_t.aria2c:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
|
||||
futures = {
|
||||
executor.submit(_download_one, url, filename): (url, filename)
|
||||
for url, filename in to_download
|
||||
}
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
sz = future.result()
|
||||
if progress is not None:
|
||||
progress.update(sz)
|
||||
print(progress.format_plain())
|
||||
else:
|
||||
for url, filename in to_download:
|
||||
sz = _download_one(url, filename)
|
||||
if progress is not None:
|
||||
progress.update(sz)
|
||||
print(progress.format_plain())
|
||||
|
||||
logger.info(dict(msg='download complete', count=len(entries)))
|
||||
|
||||
return 0
|
||||
return asyncio.run(dl.run(entries, dry_run=opts.dry_run))
|
||||
|
||||
@ -5,7 +5,7 @@ import tempfile
|
||||
import unittest
|
||||
import unittest.mock
|
||||
|
||||
from typing import Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..cli.download import (
|
||||
parse_rate_t,
|
||||
@ -85,8 +85,8 @@ class TestDownloadRequirementsParse(unittest.TestCase):
|
||||
txt = '# https://example.com/core/bash-5.2-1-x86_64.pkg.tar.zst\nbash==5.2-1 --hash=sha256:abc123\n'
|
||||
entries = download_requirements_t.parse_requirements(txt)
|
||||
self.assertEqual(len(entries), 1)
|
||||
self.assertEqual(entries[0][0], 'https://example.com/core/bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[0][1], 'bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[0].url, 'https://example.com/core/bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[0].filename, 'bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
|
||||
def test_multiple(self) -> None:
|
||||
txt = (
|
||||
@ -94,8 +94,8 @@ class TestDownloadRequirementsParse(unittest.TestCase):
|
||||
)
|
||||
entries = download_requirements_t.parse_requirements(txt)
|
||||
self.assertEqual(len(entries), 2)
|
||||
self.assertEqual(entries[0][1], 'bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[1][1], 'glibc-2.38-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[0].filename, 'bash-5.2-1-x86_64.pkg.tar.zst')
|
||||
self.assertEqual(entries[1].filename, 'glibc-2.38-1-x86_64.pkg.tar.zst')
|
||||
|
||||
def test_no_url_skipped(self) -> None:
|
||||
txt = 'bash==5.2-1\n'
|
||||
@ -122,18 +122,31 @@ class TestDownloader(unittest.TestCase):
|
||||
def test_urllib_backend(self, mock_urlretrieve: unittest.mock.MagicMock) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dest = pathlib.Path(tmpdir) / 'test.pkg'
|
||||
part = dest.with_suffix(dest.suffix + '.part')
|
||||
|
||||
def fake_retrieve(url: str, path: str) -> None:
|
||||
pathlib.Path(path).write_bytes(b'\x00')
|
||||
|
||||
mock_urlretrieve.side_effect = fake_retrieve
|
||||
downloader_t.download(
|
||||
url='https://example.com/test.pkg',
|
||||
dest=dest,
|
||||
backend=downloader_t.constants_t.backend_t.urllib,
|
||||
limit_rate=128 * 1024,
|
||||
)
|
||||
mock_urlretrieve.assert_called_once_with('https://example.com/test.pkg', str(dest))
|
||||
mock_urlretrieve.assert_called_once_with('https://example.com/test.pkg', str(part))
|
||||
self.assertTrue(dest.exists())
|
||||
|
||||
@unittest.mock.patch('subprocess.check_call')
|
||||
def test_curl_backend(self, mock_check_call: unittest.mock.MagicMock) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dest = pathlib.Path(tmpdir) / 'test.pkg'
|
||||
part = dest.with_suffix(dest.suffix + '.part')
|
||||
|
||||
def fake_call(cmd: list[str], **kwargs: Any) -> None:
|
||||
pathlib.Path(cmd[cmd.index('-o') + 1]).write_bytes(b'\x00')
|
||||
|
||||
mock_check_call.side_effect = fake_call
|
||||
downloader_t.download(
|
||||
url='https://example.com/test.pkg',
|
||||
dest=dest,
|
||||
@ -143,12 +156,19 @@ class TestDownloader(unittest.TestCase):
|
||||
cmd = mock_check_call.call_args[0][0]
|
||||
self.assertEqual(cmd[0], 'curl')
|
||||
self.assertIn('--limit-rate', cmd)
|
||||
self.assertIn(str(128 * 1024), cmd)
|
||||
self.assertTrue(dest.exists())
|
||||
|
||||
@unittest.mock.patch('subprocess.check_call')
|
||||
def test_aria2c_backend(self, mock_check_call: unittest.mock.MagicMock) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
dest = pathlib.Path(tmpdir) / 'test.pkg'
|
||||
|
||||
def fake_call(cmd: list[str], **kwargs: Any) -> None:
|
||||
d = cmd[cmd.index('-d') + 1]
|
||||
o = cmd[cmd.index('-o') + 1]
|
||||
pathlib.Path(d, o).write_bytes(b'\x00')
|
||||
|
||||
mock_check_call.side_effect = fake_call
|
||||
downloader_t.download(
|
||||
url='https://example.com/test.pkg',
|
||||
dest=dest,
|
||||
@ -158,6 +178,7 @@ class TestDownloader(unittest.TestCase):
|
||||
cmd = mock_check_call.call_args[0][0]
|
||||
self.assertEqual(cmd[0], 'aria2c')
|
||||
self.assertIn('--max-download-limit=%d' % (1024 * 1024), cmd)
|
||||
self.assertTrue(dest.exists())
|
||||
|
||||
|
||||
class TestGroupExpansion(unittest.TestCase):
|
||||
|
||||
@ -48,7 +48,12 @@ def _prefill(dest: pathlib.Path, filenames: list[str]) -> None:
|
||||
(dest / f).write_bytes(b'\x00' * 200)
|
||||
|
||||
|
||||
def _fake_download(url: str, dest: pathlib.Path, **kwargs: object) -> None:
|
||||
def _fake_download(
|
||||
url: str,
|
||||
dest: pathlib.Path,
|
||||
backend: object = None,
|
||||
limit_rate: int = 0,
|
||||
) -> None:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(b'\x00' * 200)
|
||||
|
||||
@ -78,22 +83,22 @@ def _run(extra_args: list[str], tmpdir: str) -> tuple[int, pathlib.Path, pathlib
|
||||
|
||||
class TestProgressFormat(unittest.TestCase):
|
||||
def test_initial(self) -> None:
|
||||
p = progress_t(total=10, total_bytes=100 * 1024 * 1024, already_done=3, already_bytes=30 * 1024 * 1024)
|
||||
p = progress_t(total_pkgs=10, already_pkgs=3, to_download_pkgs=7, already_bytes=30 * 1024 * 1024, total_expected_bytes=100 * 1024 * 1024, to_download_bytes=70 * 1024 * 1024, has_sizes=True)
|
||||
txt = p.format_plain()
|
||||
self.assertIn('[3/10]', txt)
|
||||
self.assertIn('this_run=0', txt)
|
||||
self.assertIn('new=0/7', txt)
|
||||
|
||||
def test_after_updates(self) -> None:
|
||||
p = progress_t(total=10, total_bytes=100 * 1024 * 1024, already_done=0, already_bytes=0)
|
||||
p = progress_t(total_pkgs=10, already_pkgs=0, to_download_pkgs=10, already_bytes=0, total_expected_bytes=100 * 1024 * 1024, to_download_bytes=100 * 1024 * 1024, has_sizes=True)
|
||||
p.update(5 * 1024 * 1024)
|
||||
p.update(5 * 1024 * 1024)
|
||||
txt = p.format_plain()
|
||||
self.assertIn('[2/10]', txt)
|
||||
self.assertIn('this_run=2', txt)
|
||||
self.assertIn('new=2/10', txt)
|
||||
|
||||
def test_eta_and_rate(self) -> None:
|
||||
p = progress_t(total=100, total_bytes=1000 * 1024 * 1024, already_done=0, already_bytes=0)
|
||||
p.start_time -= 5.0 # simulate 5s elapsed for 10 pkgs → 2 pkg/s
|
||||
p = progress_t(total_pkgs=100, already_pkgs=0, to_download_pkgs=100, already_bytes=0, total_expected_bytes=1000 * 1024 * 1024, to_download_bytes=1000 * 1024 * 1024, has_sizes=True)
|
||||
p.start_time -= 5.0
|
||||
for _ in range(10):
|
||||
p.update(10 * 1024 * 1024)
|
||||
txt = p.format_plain()
|
||||
@ -101,8 +106,8 @@ class TestProgressFormat(unittest.TestCase):
|
||||
self.assertIn('pkg/s', txt)
|
||||
|
||||
def test_slow_rate_shows_s_per_pkg(self) -> None:
|
||||
p = progress_t(total=10, total_bytes=100 * 1024 * 1024, already_done=0, already_bytes=0)
|
||||
p.start_time -= 30.0 # 30s for 1 package → 30 s/pkg
|
||||
p = progress_t(total_pkgs=10, already_pkgs=0, to_download_pkgs=10, already_bytes=0, total_expected_bytes=100 * 1024 * 1024, to_download_bytes=100 * 1024 * 1024, has_sizes=True)
|
||||
p.start_time -= 30.0
|
||||
p.update(10 * 1024 * 1024)
|
||||
txt = p.format_plain()
|
||||
self.assertIn('s/pkg', txt)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user