[+] improve cli_bootstrap.py with whl cache, venv_partial, packaging_t

1. add packaging_t: canonicalize_name, parse_whl_name_version, parse_req_spec;
  2. add whl_cache_download: download missing wheels into .venv-whl-cache;
  3. add check_host_prerequisites: verify pip and uv available on host;
  4. add BootstrapSettings: whl_cache_path, uv_cache_dir, whl_cache_update;
  5. add BootstrapSettings: uv_compile_allow_index, venv_partial;
  6. add check_type helper for typed attribute access;
  7. venv_partial recompiles with existing requirements.txt as constraints;
  8. compile output goes to temp file, moved on success only;
  9. guard typing_extensions import for bootstrap without venv;
This commit is contained in:
LLM 2026-04-03 16:46:08 +00:00
parent b67a40936b
commit 77be19948d

@ -10,6 +10,7 @@ import sys
import subprocess
import os
import logging
import re
import typing
@ -21,16 +22,24 @@ from typing import (
TypeVar,
Callable,
)
from typing_extensions import (
if typing.TYPE_CHECKING:
from typing_extensions import (
Self,
BinaryIO,
overload,
)
)
else:
try:
from typing_extensions import overload
except ModuleNotFoundError:
def overload(f: Any) -> Any:
return f
logger = logging.getLogger(__name__)
def toml_load(f: BinaryIO) -> Any:
def toml_load(f: 'BinaryIO') -> Any:
try:
tomllib = importlib.import_module('tomllib')
@ -61,9 +70,13 @@ class PyProject:
name: str
meson: Optional[pathlib.Path] = None
tool: dict[str, Any] = dataclasses.field(default_factory=lambda: dict())
scripts: dict[str, str] = dataclasses.field(default_factory=lambda: dict())
project: dict[str, Any] = dataclasses.field(default_factory=lambda: dict())
path: pathlib.Path
dependencies: dict[str, list[str]]
name: Optional[str] = None
version: Optional[str] = None
early_features: Optional[list[str]] = None
pip_find_links: Optional[list[pathlib.Path]] = None
runtime_libdirs: Optional[list[pathlib.Path]] = None
@ -172,6 +185,21 @@ def check_list(
)
def check_type(
value: Any,
VT: Type[Value],
attribute_name: Optional[str] = None,
) -> Value:
if attribute_name:
attribute_value = getattr(value, attribute_name)
assert isinstance(attribute_value, VT)
return attribute_value
else:
assert isinstance(value, VT)
return value
def pyproject_load(
d: pathlib.Path,
) -> PyProject:
@ -205,9 +233,19 @@ def pyproject_load(
dependencies[k] = v
name: Optional[str] = None
if 'name' in content.get('project', {}):
name = content['project']['name']
version: Optional[str] = None
if 'version' in content.get('project', {}):
version = content['project']['version']
res = PyProject(
path=d,
dependencies=dependencies,
name=name,
version=version,
)
tool_name = 'online.fxreader.pr34'.replace('.', '-')
@ -292,6 +330,23 @@ def pyproject_load(
)
)
if 'scripts' in o:
module.scripts.update(
check_dict(
o['scripts'],
str,
str,
)
)
if 'project' in o:
module.project.update(
check_dict(
o['project'],
str,
)
)
res.modules.append(module)
return res
@ -300,6 +355,7 @@ def pyproject_load(
@dataclasses.dataclass
class BootstrapSettings:
env_path: pathlib.Path
whl_cache_path: pathlib.Path
python_path: pathlib.Path
base_dir: pathlib.Path
python_version: Optional[str] = dataclasses.field(
@ -315,18 +371,33 @@ class BootstrapSettings:
pip_check_conflicts: Optional[bool] = dataclasses.field(
default_factory=lambda: os.environ.get('PIP_CHECK_CONFLICTS', json.dumps(True)) in [json.dumps(True)],
)
uv_cache_dir: str = dataclasses.field(
default_factory=lambda: os.environ.get(
'UV_CACHE_DIR',
str(pathlib.Path.cwd() / '.uv-cache'),
)
)
uv_args: list[str] = dataclasses.field(
default_factory=lambda: os.environ.get(
'UV_ARGS',
'--offline',
'--no-index -U',
).split(),
)
whl_cache_update: Optional[bool] = dataclasses.field(
default_factory=lambda: os.environ.get('WHL_CACHE_UPDATE', json.dumps(False)) in [json.dumps(True)]
)
uv_compile_allow_index: bool = dataclasses.field(
default_factory=lambda: os.environ.get('UV_COMPILE_ALLOW_INDEX', json.dumps(False)) in [json.dumps(True)]
)
venv_partial: bool = dataclasses.field(
default_factory=lambda: os.environ.get('VENV_PARTIAL', json.dumps(False)) in [json.dumps(True)]
)
@classmethod
def get(
cls,
base_dir: Optional[pathlib.Path] = None,
) -> Self:
) -> 'Self':
if base_dir is None:
base_dir = pathlib.Path.cwd()
@ -336,11 +407,14 @@ class BootstrapSettings:
else:
env_path = base_dir / '.venv'
whl_cache_path = env_path.parent / '.venv-whl-cache'
python_path = env_path / 'bin' / 'python3'
return cls(
base_dir=base_dir,
env_path=env_path,
whl_cache_path=whl_cache_path,
python_path=python_path,
)
@ -386,10 +460,113 @@ def requirements_name_get(
)
class packaging_t:
class constants_t:
canonicalize_re: typing.ClassVar[re.Pattern[str]] = re.compile(r'[-_.]+')
req_spec_re: typing.ClassVar[re.Pattern[str]] = re.compile(r'^([a-zA-Z0-9._-]+)==([^\s;]+)')
@dataclasses.dataclass
class pkg_id_t:
name: str
version: str
@staticmethod
def canonicalize_name(name: str) -> str:
return packaging_t.constants_t.canonicalize_re.sub('-', name).lower()
@staticmethod
def parse_whl_name_version(filename: str) -> Optional['packaging_t.pkg_id_t']:
parts = filename.split('-')
if len(parts) >= 3 and filename.endswith('.whl'):
return packaging_t.pkg_id_t(
name=packaging_t.canonicalize_name(parts[0]),
version=parts[1],
)
return None
@staticmethod
def parse_req_spec(line: str) -> Optional['packaging_t.pkg_id_t']:
m = packaging_t.constants_t.req_spec_re.match(line)
if m:
return packaging_t.pkg_id_t(
name=packaging_t.canonicalize_name(m.group(1)),
version=m.group(2),
)
return None
def whl_cache_download(
whl_cache_path: pathlib.Path,
requirements_path: pathlib.Path,
uv_python_version: list[str],
pip_find_links_args: list[str],
) -> None:
whl_cache_path.mkdir(parents=True, exist_ok=True)
cached_pkgs: set[tuple[str, str]] = set()
for whl in whl_cache_path.glob('*.whl'):
parsed = packaging_t.parse_whl_name_version(whl.name)
if parsed is not None:
cached_pkgs.add((parsed.name, parsed.version))
missing_reqs: list[str] = []
with io.open(requirements_path, 'r') as f:
for line in f:
stripped = line.strip()
if not stripped or stripped.startswith('#') or stripped.startswith('--hash'):
continue
spec = stripped.rstrip(' \\')
if spec.startswith('#'):
continue
parsed = packaging_t.parse_req_spec(spec)
if parsed is not None and (parsed.name, parsed.version) in cached_pkgs:
logger.info(dict(msg='cached', pkg='%s==%s' % (parsed.name, parsed.version)))
continue
missing_reqs.append(spec)
if not missing_reqs:
logger.info(dict(msg='all wheels cached, skipping pip download'))
return
logger.info(dict(msg='downloading missing wheels', count=len(missing_reqs), pkgs=missing_reqs))
with tempfile.NamedTemporaryFile(mode='w', prefix='requirements_missing_', suffix='.txt', delete=False) as f:
f.write('\n'.join(missing_reqs))
f.flush()
missing_req_path = f.name
try:
cmd = [
sys.executable, '-m', 'pip', 'download', '--only-binary=:all:',
*uv_python_version, *pip_find_links_args,
'-r', missing_req_path,
'-d', str(whl_cache_path),
]
logger.info(dict(cmd=cmd))
subprocess.check_call(cmd)
finally:
os.unlink(missing_req_path)
def check_host_prerequisites() -> None:
for mod in ['pip', 'uv']:
try:
subprocess.check_call(
[sys.executable, '-m', mod, '--version'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except (subprocess.CalledProcessError, FileNotFoundError):
logger.error('[bootstrap] %s -m %s is not available on the host system' % (sys.executable, mod))
sys.exit(1)
def env_bootstrap(
bootstrap_settings: BootstrapSettings,
pyproject: PyProject,
) -> None:
check_host_prerequisites()
pip_find_links: list[pathlib.Path] = []
if not pyproject.pip_find_links is None:
@ -464,59 +641,108 @@ def env_bootstrap(
]
)
if not requirements_path.exists():
logger.info('[bootstrap] step 1/5: compile requirements')
needs_compile = not requirements_path.exists()
constraint_args: list[str] = []
if bootstrap_settings.venv_partial and requirements_path.exists():
logger.info('[bootstrap] VENV_PARTIAL: recompiling with existing requirements.txt as constraints')
needs_compile = True
constraint_args = ['-c', str(requirements_path)]
if (not bootstrap_settings.whl_cache_path.exists() or bootstrap_settings.whl_cache_update) and requirements_path.exists():
whl_cache_download(
whl_cache_path=bootstrap_settings.whl_cache_path,
requirements_path=requirements_path,
uv_python_version=uv_python_version,
pip_find_links_args=pip_find_links_args,
)
cache_find_links_args: list[str] = []
if bootstrap_settings.whl_cache_path.exists():
cache_find_links_args = ['-f', str(bootstrap_settings.whl_cache_path)]
if needs_compile:
with tempfile.NamedTemporaryFile(
mode='w',
prefix='requirements',
suffix='.in',
) as f:
f.write('\n'.join(requirements_in))
f.flush()
) as f_in, tempfile.NamedTemporaryFile(
mode='w',
prefix='requirements',
suffix='.txt',
dir=requirements_path.parent,
delete=False,
) as f_out:
f_in.write('\n'.join(requirements_in))
f_in.flush()
subprocess.check_call(
[
uv_compile_args = bootstrap_settings.uv_args
if bootstrap_settings.uv_compile_allow_index:
uv_compile_args = [o for o in uv_compile_args if o not in ('--no-index', '-U', '--upgrade')]
if len(constraint_args) > 0:
uv_compile_args = [o for o in uv_compile_args if o not in ('-U', '--upgrade')]
cmd = [
'uv',
'pip',
'compile',
'--cache-dir', bootstrap_settings.uv_cache_dir,
'pip', 'compile',
*uv_python_version,
'--generate-hashes',
*pip_find_links_args,
# '-p',
# bootstrap_settings.python_path,
*bootstrap_settings.uv_args,
'-o',
str(requirements_path),
f.name,
*cache_find_links_args,
*constraint_args,
*uv_compile_args,
'-o', f_out.name,
f_in.name,
]
)
logger.info(dict(cmd=cmd))
subprocess.check_call(
[
try:
subprocess.check_call(cmd)
os.replace(f_out.name, str(requirements_path))
except subprocess.CalledProcessError:
os.unlink(f_out.name)
raise
if not bootstrap_settings.whl_cache_path.exists() or bootstrap_settings.whl_cache_update:
whl_cache_download(
whl_cache_path=bootstrap_settings.whl_cache_path,
requirements_path=requirements_path,
uv_python_version=uv_python_version,
pip_find_links_args=pip_find_links_args,
)
if bootstrap_settings.whl_cache_path.exists():
cache_find_links_args = ['-f', str(bootstrap_settings.whl_cache_path)]
if bootstrap_settings.venv_partial and bootstrap_settings.env_path.exists():
logger.info('[bootstrap] VENV_PARTIAL: skipping venv creation (already exists)')
else:
subprocess.check_call([
'uv',
'--cache-dir', bootstrap_settings.uv_cache_dir,
*[o for o in bootstrap_settings.uv_args if o not in ['-U', '--upgrade', '--no-index']],
'venv',
*venv_python_version,
*pip_find_links_args,
# '--seed',
*bootstrap_settings.uv_args,
*cache_find_links_args,
str(bootstrap_settings.env_path),
]
)
])
subprocess.check_call(
[
cmd = [
'uv',
'pip',
'install',
'--cache-dir', bootstrap_settings.uv_cache_dir,
'pip', 'install',
*uv_python_version,
*pip_find_links_args,
'-p',
bootstrap_settings.python_path,
*cache_find_links_args,
'-p', str(bootstrap_settings.python_path),
'--require-hashes',
*bootstrap_settings.uv_args,
'-r',
str(requirements_path),
'-r', str(requirements_path),
]
)
logger.info(dict(cmd=cmd))
subprocess.check_call(cmd)
if bootstrap_settings.pip_check_conflicts:
subprocess.check_call(
@ -547,9 +773,12 @@ def run(
pyproject: PyProject = pyproject_load(d)
logging.basicConfig(level=logging.INFO)
logging.basicConfig(
level=logging.INFO,
format='%(levelname)s:%(name)s:%(message)s:%(process)d:%(asctime)s:%(pathname)s:%(funcName)s:%(lineno)s',
)
if not bootstrap_settings.env_path.exists():
if not bootstrap_settings.env_path.exists() or bootstrap_settings.venv_partial:
env_bootstrap(
bootstrap_settings=bootstrap_settings,
pyproject=pyproject,