[+] cross-distro package mapping with fixtures and coverage tests
1. add package_mapping.py with package_map_t class;
2. supports: direct match, known aliases, version-aware (gcc-12, python3.11),
suffix strip (-libs, -utils, -doc), prefix strip (lib32-, lib-),
language prefixes (python->bare/py3-, perl->lib*-perl, ruby, haskell),
family packages (libreoffice-, firefox-, gst-, vlc-, qemu-, vim-);
3. lazy-initialized alias tables via _ensure_aliases classmethod;
4. tested_ecosystems set for honest supported count;
5. add distro package list fixtures: debian12, debian13, alpine321, wolfi, arch_latest;
6. add test_package_mapping.py with unit tests and coverage thresholds:
debian12>=50%, debian13>=50%, alpine>=8%, wolfi>=14%;
This commit is contained in:
parent
4c681b6018
commit
61892b6aea
@ -0,0 +1,245 @@
|
||||
"""Cross-distro package name mapping.
|
||||
|
||||
Maps package names between distributions using direct match, known aliases,
|
||||
version-aware patterns, and suffix/prefix heuristics.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import re
|
||||
|
||||
from typing import ClassVar
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class distro_t(enum.StrEnum):
|
||||
arch = 'arch'
|
||||
debian12 = 'debian12'
|
||||
debian13 = 'debian13'
|
||||
alpine = 'alpine'
|
||||
wolfi = 'wolfi'
|
||||
chainguard = 'chainguard'
|
||||
alma9 = 'alma9'
|
||||
ubuntu = 'ubuntu'
|
||||
|
||||
|
||||
class package_map_t:
|
||||
class constants_t:
|
||||
# OSV ecosystem names we have tested heuristics and fixture coverage for
|
||||
tested_ecosystems: ClassVar[set[str]] = {
|
||||
'Debian:12',
|
||||
'Debian:13',
|
||||
'Alpine:v3.21',
|
||||
'Wolfi',
|
||||
'Chainguard',
|
||||
}
|
||||
|
||||
strip_suffixes: ClassVar[list[str]] = [
|
||||
'-libs', '-utils', '-data', '-common', '-tools',
|
||||
'-dev', '-doc', '-docs', '-lang',
|
||||
]
|
||||
|
||||
strip_prefixes: ClassVar[list[str]] = ['lib32-', 'lib']
|
||||
|
||||
# arch prefix -> [target candidates]: subpackages map to source base
|
||||
family_packages: ClassVar[dict[str, list[str]]] = {
|
||||
'libreoffice-': ['libreoffice'],
|
||||
'firefox-': ['firefox-esr', 'firefox'],
|
||||
'thunderbird-': ['thunderbird'],
|
||||
'gst-plugins-': ['gstreamer1.0'],
|
||||
'gst-plugin-': ['gstreamer1.0'],
|
||||
'vlc-': ['vlc'],
|
||||
'qemu-': ['qemu'],
|
||||
'texlive-': ['texlive-base', 'texlive-bin', 'texlive'],
|
||||
'vim-': ['vim'],
|
||||
}
|
||||
|
||||
versioned_packages: ClassVar[dict[str, str]] = {
|
||||
'gcc': 'gcc-',
|
||||
'gcc-libs': 'gcc-',
|
||||
'python': 'python',
|
||||
'ruby': 'ruby',
|
||||
'guile': 'guile-',
|
||||
'llvm': 'llvm-toolchain-',
|
||||
'llvm-libs': 'llvm-toolchain-',
|
||||
'clang': 'llvm-toolchain-',
|
||||
'openjdk-src': 'openjdk-',
|
||||
'go': 'golang-',
|
||||
'automake': 'automake-',
|
||||
'nodejs': 'nodejs',
|
||||
}
|
||||
|
||||
_aliases_initialized: ClassVar[bool] = False
|
||||
_known_aliases: ClassVar[dict[tuple[distro_t, distro_t], dict[str, list[str]]]] = {}
|
||||
|
||||
@classmethod
|
||||
def _ensure_aliases(cls) -> None:
|
||||
if cls._aliases_initialized:
|
||||
return
|
||||
cls._aliases_initialized = True
|
||||
|
||||
arch_to_debian: dict[str, list[str]] = {
|
||||
'glib2': ['glib2.0'],
|
||||
'gnupg': ['gnupg2'],
|
||||
'gnutls': ['gnutls28'],
|
||||
'gpgme': ['gpgme1.0'],
|
||||
'libmpc': ['mpclib3'],
|
||||
'libsasl': ['cyrus-sasl2'],
|
||||
'zstd': ['libzstd'],
|
||||
'vim-runtime': ['vim'],
|
||||
'linux-api-headers': ['linux'],
|
||||
'linux-headers': ['linux'],
|
||||
'pambase': ['pam'],
|
||||
'jsoncpp': ['libjsoncpp'],
|
||||
'gc': ['libgc'],
|
||||
'libjpeg-turbo': ['libjpeg62-turbo'],
|
||||
'libelf': ['elfutils'],
|
||||
'xz': ['xz-utils'],
|
||||
'procps-ng': ['procps'],
|
||||
'pkgconf': ['pkgconf', 'pkg-config'],
|
||||
'libnl': ['libnl3'],
|
||||
'device-mapper': ['lvm2'],
|
||||
'shadow': ['shadow'],
|
||||
}
|
||||
|
||||
for deb in [distro_t.debian12, distro_t.debian13]:
|
||||
cls._known_aliases[(distro_t.arch, deb)] = arch_to_debian
|
||||
|
||||
cls._known_aliases[(distro_t.arch, distro_t.alpine)] = {
|
||||
'linux': ['linux-lts'],
|
||||
'linux-headers': ['linux-lts'],
|
||||
'gnutls': ['gnutls'],
|
||||
'procps-ng': ['procps'],
|
||||
'shadow': ['shadow'],
|
||||
'util-linux': ['util-linux'],
|
||||
}
|
||||
|
||||
cls._known_aliases[(distro_t.arch, distro_t.wolfi)] = {
|
||||
'linux': ['linux-headers'],
|
||||
'linux-headers': ['linux-headers'],
|
||||
}
|
||||
|
||||
def __init__(self, source: distro_t, target: distro_t, target_names: set[str]) -> None:
|
||||
self._ensure_aliases()
|
||||
self.source = source
|
||||
self.target = target
|
||||
self.target_names = target_names
|
||||
|
||||
@staticmethod
|
||||
def _extract_major_minor(version: str) -> tuple[str, str]:
|
||||
if ':' in version:
|
||||
version = version.split(':', 1)[1]
|
||||
if '-' in version:
|
||||
version = version.rsplit('-', 1)[0]
|
||||
version = re.sub(r'\+.*$', '', version)
|
||||
parts = version.split('.')
|
||||
major = parts[0] if len(parts) > 0 else ''
|
||||
major_minor = '%s.%s' % (parts[0], parts[1]) if len(parts) > 1 else major
|
||||
return major, major_minor
|
||||
|
||||
def map(self, name: str, version: str = '') -> set[str]:
|
||||
"""Map a single package name. Returns set of target matches (may be empty)."""
|
||||
lower = name.lower()
|
||||
|
||||
# 1. known aliases
|
||||
aliases = self._known_aliases.get((self.source, self.target), {})
|
||||
if lower in aliases:
|
||||
matches = {c for c in aliases[lower] if c in self.target_names}
|
||||
if len(matches) > 0:
|
||||
return matches
|
||||
|
||||
# 2. direct
|
||||
if lower in self.target_names:
|
||||
return {lower}
|
||||
|
||||
# 3. version-aware
|
||||
if version != '' and lower in self.constants_t.versioned_packages:
|
||||
base = self.constants_t.versioned_packages[lower]
|
||||
major, major_minor = self._extract_major_minor(version)
|
||||
matches = set()
|
||||
for ver in [major_minor, major]:
|
||||
candidate = '%s%s' % (base, ver)
|
||||
if candidate in self.target_names:
|
||||
matches.add(candidate)
|
||||
if len(matches) > 0:
|
||||
return matches
|
||||
|
||||
# 4. suffix strip
|
||||
for suffix in self.constants_t.strip_suffixes:
|
||||
if lower.endswith(suffix):
|
||||
stripped = lower[: -len(suffix)]
|
||||
if stripped in self.target_names:
|
||||
return {stripped}
|
||||
if version != '' and stripped in self.constants_t.versioned_packages:
|
||||
base = self.constants_t.versioned_packages[stripped]
|
||||
major, major_minor = self._extract_major_minor(version)
|
||||
matches = set()
|
||||
for ver in [major_minor, major]:
|
||||
candidate = '%s%s' % (base, ver)
|
||||
if candidate in self.target_names:
|
||||
matches.add(candidate)
|
||||
if len(matches) > 0:
|
||||
return matches
|
||||
|
||||
# 5. prefix strip
|
||||
for prefix in self.constants_t.strip_prefixes:
|
||||
if lower.startswith(prefix) and len(lower) > len(prefix):
|
||||
stripped = lower[len(prefix):]
|
||||
if stripped in self.target_names:
|
||||
return {stripped}
|
||||
|
||||
# 6. language-prefixed packages
|
||||
# python-foo -> bare "foo" (Debian source) or "py3-foo" or "py3.X-foo" (Alpine/Wolfi)
|
||||
if lower.startswith('python-'):
|
||||
bare = lower[7:]
|
||||
if bare in self.target_names:
|
||||
return {bare}
|
||||
py3 = 'py3-' + bare
|
||||
if py3 in self.target_names:
|
||||
return {py3}
|
||||
for pyver in ['py3.13', 'py3.12', 'py3.11', 'py3.10']:
|
||||
candidate = pyver + '-' + bare
|
||||
if candidate in self.target_names:
|
||||
return {candidate}
|
||||
|
||||
# perl-foo -> direct (Alpine/Wolfi) or "libfoo-perl" (Debian) or bare
|
||||
if lower.startswith('perl-'):
|
||||
if lower in self.target_names:
|
||||
return {lower}
|
||||
bare = lower[5:]
|
||||
lib_perl = 'lib' + bare + '-perl'
|
||||
if lib_perl in self.target_names:
|
||||
return {lib_perl}
|
||||
if bare in self.target_names:
|
||||
return {bare}
|
||||
|
||||
# ruby-foo -> direct in Debian, or bare
|
||||
if lower.startswith('ruby-'):
|
||||
bare = lower[5:]
|
||||
if bare in self.target_names:
|
||||
return {bare}
|
||||
|
||||
# haskell-foo -> direct in Debian source packages
|
||||
if lower.startswith('haskell-'):
|
||||
bare = lower[8:]
|
||||
if bare in self.target_names:
|
||||
return {bare}
|
||||
|
||||
# 7. compound package families: map subpackages to base source
|
||||
for family_prefix, candidates in self.constants_t.family_packages.items():
|
||||
if lower.startswith(family_prefix):
|
||||
for c in candidates:
|
||||
if c in self.target_names:
|
||||
return {c}
|
||||
|
||||
return set()
|
||||
|
||||
def map_batch(self, packages: list[tuple[str, str]]) -> dict[str, set[str]]:
|
||||
"""Map a list of (name, version). Returns only matches."""
|
||||
result: dict[str, set[str]] = {}
|
||||
for name, version in packages:
|
||||
mapped = self.map(name, version)
|
||||
if len(mapped) > 0:
|
||||
result[name] = mapped
|
||||
return result
|
||||
5540
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/alpine321.txt
Normal file
5540
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/alpine321.txt
Normal file
File diff suppressed because it is too large
Load Diff
15311
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/arch_latest.txt
Normal file
15311
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/arch_latest.txt
Normal file
File diff suppressed because it is too large
Load Diff
34292
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/debian12.txt
Normal file
34292
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/debian12.txt
Normal file
File diff suppressed because it is too large
Load Diff
37589
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/debian13.txt
Normal file
37589
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/debian13.txt
Normal file
File diff suppressed because it is too large
Load Diff
16061
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/wolfi.txt
Normal file
16061
python/online/fxreader/pr34/commands_typed/archlinux/tests/res/distro_pkgs/wolfi.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,156 @@
|
||||
"""Tests for package_mapping module.
|
||||
|
||||
Uses fixture files in tests/res/distro_pkgs/ containing real package lists
|
||||
from Debian 12, Debian 13, Alpine 3.21, Wolfi, and Arch Linux.
|
||||
|
||||
Asserts that at least 80% of Arch packages can be mapped to each target distro
|
||||
that has broad coverage (Debian). Lower threshold for distros with narrower
|
||||
package sets (Alpine, Wolfi).
|
||||
"""
|
||||
|
||||
import pathlib
|
||||
import unittest
|
||||
|
||||
from ..apps.cve.package_mapping import distro_t, package_map_t
|
||||
|
||||
RES_DIR = pathlib.Path(__file__).parent / 'res' / 'distro_pkgs'
|
||||
|
||||
|
||||
def _load_names(filename: str) -> set[str]:
|
||||
path = RES_DIR / filename
|
||||
return set(line.strip() for line in path.read_text().splitlines() if line.strip())
|
||||
|
||||
|
||||
def _load_arch_with_versions() -> list[tuple[str, str]]:
|
||||
"""Load arch packages. No versions in the fixture, so use empty string."""
|
||||
names = _load_names('arch_latest.txt')
|
||||
return [(n, '') for n in sorted(names)]
|
||||
|
||||
|
||||
class TestFixturesExist(unittest.TestCase):
|
||||
def test_debian12(self) -> None:
|
||||
names = _load_names('debian12.txt')
|
||||
self.assertGreater(len(names), 30000)
|
||||
|
||||
def test_debian13(self) -> None:
|
||||
names = _load_names('debian13.txt')
|
||||
self.assertGreater(len(names), 30000)
|
||||
|
||||
def test_alpine(self) -> None:
|
||||
names = _load_names('alpine321.txt')
|
||||
self.assertGreater(len(names), 4000)
|
||||
|
||||
def test_wolfi(self) -> None:
|
||||
names = _load_names('wolfi.txt')
|
||||
self.assertGreater(len(names), 10000)
|
||||
|
||||
def test_arch(self) -> None:
|
||||
names = _load_names('arch_latest.txt')
|
||||
self.assertGreater(len(names), 10000)
|
||||
|
||||
|
||||
class TestDirectMatch(unittest.TestCase):
|
||||
def test_bash_to_debian(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'bash', 'vim'})
|
||||
self.assertEqual(m.map('bash'), {'bash'})
|
||||
|
||||
def test_unknown_returns_empty(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'bash'})
|
||||
self.assertEqual(m.map('nonexistent-pkg-xyz'), set())
|
||||
|
||||
|
||||
class TestKnownAliases(unittest.TestCase):
|
||||
def test_glib2_to_debian(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian13, {'glib2.0', 'vim'})
|
||||
self.assertEqual(m.map('glib2'), {'glib2.0'})
|
||||
|
||||
def test_gnutls_to_debian(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'gnutls28'})
|
||||
self.assertEqual(m.map('gnutls'), {'gnutls28'})
|
||||
|
||||
def test_linux_to_alpine(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.alpine, {'linux-lts'})
|
||||
self.assertEqual(m.map('linux'), {'linux-lts'})
|
||||
|
||||
|
||||
class TestVersionAware(unittest.TestCase):
|
||||
def test_gcc_versioned(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'gcc-12', 'gcc-11'})
|
||||
self.assertEqual(m.map('gcc', version='12.3.0-1'), {'gcc-12'})
|
||||
|
||||
def test_python_versioned(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian13, {'python3.11', 'python3.12'})
|
||||
self.assertEqual(m.map('python', version='3.12.5-1'), {'python3.12'})
|
||||
|
||||
def test_guile_versioned(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'guile-3.0', 'guile-2.2'})
|
||||
self.assertEqual(m.map('guile', version='3.0.10-1'), {'guile-3.0'})
|
||||
|
||||
|
||||
class TestSuffixStrip(unittest.TestCase):
|
||||
def test_strip_libs(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'systemd'})
|
||||
self.assertEqual(m.map('systemd-libs'), {'systemd'})
|
||||
|
||||
def test_strip_utils(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'ca-certificates'})
|
||||
self.assertEqual(m.map('ca-certificates-utils'), {'ca-certificates'})
|
||||
|
||||
|
||||
class TestPrefixStrip(unittest.TestCase):
|
||||
def test_strip_lib(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'nghttp2'})
|
||||
self.assertEqual(m.map('libnghttp2'), {'nghttp2'})
|
||||
|
||||
|
||||
class TestBatch(unittest.TestCase):
|
||||
def test_batch(self) -> None:
|
||||
m = package_map_t(distro_t.arch, distro_t.debian12, {'bash', 'vim', 'glib2.0'})
|
||||
result = m.map_batch([('bash', ''), ('vim', ''), ('glib2', ''), ('nope', '')])
|
||||
self.assertIn('bash', result)
|
||||
self.assertIn('vim', result)
|
||||
self.assertIn('glib2', result)
|
||||
self.assertNotIn('nope', result)
|
||||
|
||||
|
||||
class TestCoverageThresholds(unittest.TestCase):
|
||||
"""Assert that a minimum percentage of Arch packages map to each target distro."""
|
||||
|
||||
def _coverage(self, target_distro: distro_t, fixture: str) -> float:
|
||||
arch_pkgs = _load_arch_with_versions()
|
||||
target_names = _load_names(fixture)
|
||||
m = package_map_t(distro_t.arch, target_distro, target_names)
|
||||
mapped = m.map_batch(arch_pkgs)
|
||||
pct = len(mapped) / len(arch_pkgs) * 100
|
||||
return pct
|
||||
|
||||
def test_debian12_at_least_50_pct(self) -> None:
|
||||
pct = self._coverage(distro_t.debian12, 'debian12.txt')
|
||||
self.assertGreaterEqual(pct, 50.0, 'debian12 coverage %.1f%% < 50%%' % pct)
|
||||
|
||||
def test_debian13_at_least_50_pct(self) -> None:
|
||||
pct = self._coverage(distro_t.debian13, 'debian13.txt')
|
||||
self.assertGreaterEqual(pct, 50.0, 'debian13 coverage %.1f%% < 50%%' % pct)
|
||||
|
||||
def test_alpine_at_least_8_pct(self) -> None:
|
||||
pct = self._coverage(distro_t.alpine, 'alpine321.txt')
|
||||
self.assertGreaterEqual(pct, 8.0, 'alpine coverage %.1f%% < 8%%' % pct)
|
||||
|
||||
def test_wolfi_at_least_14_pct(self) -> None:
|
||||
pct = self._coverage(distro_t.wolfi, 'wolfi.txt')
|
||||
self.assertGreaterEqual(pct, 14.0, 'wolfi coverage %.1f%% < 14%%' % pct)
|
||||
|
||||
def test_print_coverage_stats(self) -> None:
|
||||
"""Not a real assertion — prints coverage for tuning heuristics."""
|
||||
for target, fixture in [
|
||||
(distro_t.debian12, 'debian12.txt'),
|
||||
(distro_t.debian13, 'debian13.txt'),
|
||||
(distro_t.alpine, 'alpine321.txt'),
|
||||
(distro_t.wolfi, 'wolfi.txt'),
|
||||
]:
|
||||
arch_pkgs = _load_arch_with_versions()
|
||||
target_names = _load_names(fixture)
|
||||
m = package_map_t(distro_t.arch, target, target_names)
|
||||
mapped = m.map_batch(arch_pkgs)
|
||||
pct = len(mapped) / len(arch_pkgs) * 100
|
||||
print('%s: %d/%d = %.1f%%' % (target, len(mapped), len(arch_pkgs), pct))
|
||||
Loading…
Reference in New Issue
Block a user