diff --git a/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py b/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py
index c2d76ed..26c867d 100644
--- a/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py
+++ b/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py
@@ -46,3 +46,15 @@ class manager_t(abc.ABC):
) -> None:
"""Sync a range of dates."""
...
+
+ @abc.abstractmethod
+ def sync_reference(
+ self,
+ reference: dict[str, str],
+ cache_dir: pathlib.Path,
+ cache_db: cache_db_t,
+ repos: Optional[list[str]] = None,
+ arch: str = 'x86_64',
+ ) -> None:
+ """Fetch archive listings for pinned versions not in cache, sync needed dates."""
+ ...
diff --git a/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py b/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py
new file mode 100644
index 0000000..9ee9b8a
--- /dev/null
+++ b/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py
@@ -0,0 +1,352 @@
+import datetime
+import pathlib
+import sqlite3
+import tempfile
+import unittest
+import unittest.mock
+
+from ..apps.cache.db import cache_db_t, archive_version_row_t, archive_version_status_t
+from ..apps.pacman.manager import pacman_manager_t, archive_entry_t
+
+
+SAMPLE_NGINX_HTML = """\
+
+
Index of /packages/g/glibc/
+
+Index of /packages/g/glibc/
../
+glibc-2.37-3-x86_64.pkg.tar.zst 15-Apr-2023 20:55 10M
+glibc-2.37-3-x86_64.pkg.tar.zst.sig 15-Apr-2023 20:55 566
+glibc-2.38-7-x86_64.pkg.tar.zst 03-Dec-2023 18:33 10M
+glibc-2.38-7-x86_64.pkg.tar.zst.sig 03-Dec-2023 18:33 566
+glibc-2.39-1-x86_64.pkg.tar.zst 02-Feb-2024 16:50 10M
+glibc-2.39-1-x86_64.pkg.tar.zst.sig 02-Feb-2024 16:50 566
+glibc-2.41-1-x86_64.pkg.tar.zst 28-Jan-2025 03:11 11M
+glibc-2.41-1-x86_64.pkg.tar.zst.sig 28-Jan-2025 03:11 566
+
+
+"""
+
+SAMPLE_NGINX_HTML_PYTHON = """\
+
+Index of /packages/p/python/
+
+Index of /packages/p/python/
../
+python-3.11.5-1-x86_64.pkg.tar.zst 07-Aug-2023 12:00 20M
+python-3.11.5-1-x86_64.pkg.tar.zst.sig 07-Aug-2023 12:00 566
+python-3.12.1-1-x86_64.pkg.tar.zst 15-Dec-2023 09:00 21M
+python-3.12.1-1-x86_64.pkg.tar.zst.sig 15-Dec-2023 09:00 566
+
+
+"""
+
+
+class TestParseArchiveHtml(unittest.TestCase):
+ def test_parse_glibc_entries(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML)
+ self.assertEqual(len(entries), 4)
+
+ def test_parse_excludes_sig_files(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML)
+ for e in entries:
+ self.assertFalse(e.filename.endswith('.sig'))
+
+ def test_parse_extracts_version(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML)
+ versions = [e.version for e in entries]
+ self.assertIn('2.37-3', versions)
+ self.assertIn('2.38-7', versions)
+ self.assertIn('2.39-1', versions)
+ self.assertIn('2.41-1', versions)
+
+ def test_parse_extracts_date(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML)
+ by_version = {e.version: e for e in entries}
+ self.assertEqual(by_version['2.39-1'].date, datetime.date(2024, 2, 2))
+ self.assertEqual(by_version['2.41-1'].date, datetime.date(2025, 1, 28))
+
+ def test_parse_extracts_filename(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML)
+ by_version = {e.version: e for e in entries}
+ self.assertEqual(
+ by_version['2.39-1'].filename,
+ 'glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+
+ def test_parse_empty_html(self) -> None:
+ html = ''
+ entries = pacman_manager_t.parse_archive_listing('glibc', html)
+ self.assertEqual(entries, [])
+
+ def test_parse_python_package(self) -> None:
+ entries = pacman_manager_t.parse_archive_listing('python', SAMPLE_NGINX_HTML_PYTHON)
+ self.assertEqual(len(entries), 2)
+ versions = [e.version for e in entries]
+ self.assertIn('3.11.5-1', versions)
+ self.assertIn('3.12.1-1', versions)
+
+
+class TestArchiveVersionTable(unittest.TestCase):
+ def setUp(self) -> None:
+ self.conn = sqlite3.connect(':memory:')
+ cache_db_t.migrate(self.conn, 0, cache_db_t.schema_version())
+ self.db = cache_db_t(self.conn)
+
+ def tearDown(self) -> None:
+ self.conn.close()
+
+ def test_upsert_archive_version(self) -> None:
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ rows = list(self.db.list_archive_versions('glibc'))
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0].version, '2.39-1')
+ self.assertEqual(rows[0].status, archive_version_status_t.pending)
+
+ def test_upsert_archive_version_idempotent(self) -> None:
+ for _ in range(3):
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ rows = list(self.db.list_archive_versions('glibc'))
+ self.assertEqual(len(rows), 1)
+
+ def test_mark_synced(self) -> None:
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ self.db.mark_archive_version_synced('glibc', '2.39-1')
+ rows = list(self.db.list_archive_versions('glibc'))
+ self.assertEqual(rows[0].status, archive_version_status_t.synced)
+
+ def test_list_pending(self) -> None:
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.38-7',
+ archive_date=datetime.date(2023, 12, 3),
+ filename='glibc-2.38-7-x86_64.pkg.tar.zst',
+ )
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ self.db.mark_archive_version_synced('glibc', '2.38-7')
+
+ pending = list(self.db.list_pending_archive_versions())
+ self.assertEqual(len(pending), 1)
+ self.assertEqual(pending[0].name, 'glibc')
+ self.assertEqual(pending[0].version, '2.39-1')
+
+ def test_find_archive_date_for_version(self) -> None:
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ row = self.db.find_archive_version('glibc', '2.39-1')
+ self.assertIsNotNone(row)
+ self.assertEqual(row.archive_date, datetime.date(2024, 2, 2))
+
+ def test_find_archive_version_not_found(self) -> None:
+ row = self.db.find_archive_version('glibc', '9.99-1')
+ self.assertIsNone(row)
+
+ def test_bulk_upsert(self) -> None:
+ entries = [
+ archive_entry_t(
+ name='glibc',
+ version='2.38-7',
+ filename='glibc-2.38-7-x86_64.pkg.tar.zst',
+ date=datetime.date(2023, 12, 3),
+ ),
+ archive_entry_t(
+ name='glibc',
+ version='2.39-1',
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ date=datetime.date(2024, 2, 2),
+ ),
+ ]
+ self.db.bulk_upsert_archive_versions(entries)
+ rows = list(self.db.list_archive_versions('glibc'))
+ self.assertEqual(len(rows), 2)
+
+ def test_pending_dates_for_reference(self) -> None:
+ """Given a reference dict, find which versions are missing from cache
+ and return dates that need syncing."""
+ self.db.upsert_archive_version(
+ name='glibc',
+ version='2.39-1',
+ archive_date=datetime.date(2024, 2, 2),
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ self.db.upsert_archive_version(
+ name='python',
+ version='3.12.1-1',
+ archive_date=datetime.date(2023, 12, 15),
+ filename='python-3.12.1-1-x86_64.pkg.tar.zst',
+ )
+ # glibc synced, python not
+ self.db.mark_archive_version_synced('glibc', '2.39-1')
+
+ pending = list(self.db.list_pending_archive_versions())
+ dates = {r.archive_date for r in pending}
+ self.assertIn(datetime.date(2023, 12, 15), dates)
+ self.assertNotIn(datetime.date(2024, 2, 2), dates)
+
+
+class TestSyncReference(unittest.TestCase):
+ """Tests for sync_reference: fetches archive listings, populates
+ archive_versions, determines dates to sync, syncs them."""
+
+ def setUp(self) -> None:
+ self.conn = sqlite3.connect(':memory:')
+ cache_db_t.migrate(self.conn, 0, cache_db_t.schema_version())
+ self.db = cache_db_t(self.conn)
+ self.mgr = pacman_manager_t()
+ self._tmp = tempfile.TemporaryDirectory()
+ self.cache_dir = pathlib.Path(self._tmp.name)
+
+ def tearDown(self) -> None:
+ self.conn.close()
+ self._tmp.cleanup()
+
+ def _mock_fetch_listing(self, responses: dict[str, str]) -> unittest.mock.MagicMock:
+ """Mock _fetch_archive_page to return canned HTML per package name."""
+ def side_effect(pkg_name: str) -> str:
+ return responses.get(pkg_name, '')
+ return unittest.mock.patch.object(
+ self.mgr, '_fetch_archive_page', side_effect=side_effect,
+ )
+
+ def test_sync_reference_discovers_versions(self) -> None:
+ reference = {'glibc': '2.39-1', 'python': '3.12.1-1'}
+
+ with self._mock_fetch_listing({
+ 'glibc': SAMPLE_NGINX_HTML,
+ 'python': SAMPLE_NGINX_HTML_PYTHON,
+ }):
+ with unittest.mock.patch.object(self.mgr, 'sync_date'):
+ self.mgr.sync_reference(
+ reference=reference,
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+
+ glibc_row = self.db.find_archive_version('glibc', '2.39-1')
+ self.assertIsNotNone(glibc_row)
+ self.assertEqual(glibc_row.archive_date, datetime.date(2024, 2, 2))
+
+ python_row = self.db.find_archive_version('python', '3.12.1-1')
+ self.assertIsNotNone(python_row)
+ self.assertEqual(python_row.archive_date, datetime.date(2023, 12, 15))
+
+ def test_sync_reference_calls_sync_date(self) -> None:
+ reference = {'glibc': '2.39-1'}
+
+ with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+ with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+ self.mgr.sync_reference(
+ reference=reference,
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+
+ # should sync the date corresponding to glibc 2.39-1 (2024/02/02)
+ self.assertGreater(mock_sync.call_count, 0)
+ synced_dates = {call.kwargs.get('date') or call.args[0] for call in mock_sync.call_args_list}
+ self.assertIn('2024/02/02', synced_dates)
+
+ def test_sync_reference_skips_already_cached(self) -> None:
+ """If a (name, version) already exists in packages table, skip it."""
+ from ..apps.pacman.types import repo_index_t, package_desc_t
+
+ # pre-populate: glibc 2.39-1 is already in a cached snapshot
+ snapshot_id = self.db.upsert_snapshot(
+ date='2024/02/02',
+ repo='core',
+ arch='x86_64',
+ db_sha256='abc123',
+ )
+ idx = repo_index_t(name='core')
+ idx.packages['glibc'] = package_desc_t(
+ name='glibc', version='2.39-1',
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ self.db.store_index(snapshot_id=snapshot_id, index=idx)
+
+ reference = {'glibc': '2.39-1'}
+
+ with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}) as mock_fetch:
+ with unittest.mock.patch.object(self.mgr, 'sync_date'):
+ self.mgr.sync_reference(
+ reference=reference,
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+
+ # should not have fetched archive page since version is already in packages
+ mock_fetch.assert_not_called()
+
+ def test_sync_reference_marks_synced(self) -> None:
+ from ..apps.pacman.types import repo_index_t, package_desc_t
+
+ reference = {'glibc': '2.39-1'}
+
+ def fake_sync_date(**kwargs: object) -> None:
+ # simulate what sync_date does: insert a package into the db
+ snapshot_id = self.db.upsert_snapshot(
+ date='2024/02/02', repo='core', arch='x86_64', db_sha256='fake',
+ )
+ idx = repo_index_t(name='core')
+ idx.packages['glibc'] = package_desc_t(
+ name='glibc', version='2.39-1',
+ filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+ )
+ self.db.store_index(snapshot_id=snapshot_id, index=idx)
+
+ with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+ with unittest.mock.patch.object(self.mgr, 'sync_date', side_effect=fake_sync_date):
+ self.mgr.sync_reference(
+ reference=reference,
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+
+ row = self.db.find_archive_version('glibc', '2.39-1')
+ self.assertIsNotNone(row)
+ self.assertEqual(row.status, archive_version_status_t.synced)
+
+ def test_sync_reference_version_not_in_archive(self) -> None:
+ """If the version isn't found in archive listing, log warning, no crash."""
+ reference = {'glibc': '9.99.99-1'}
+
+ with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+ with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+ self.mgr.sync_reference(
+ reference=reference,
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+
+ mock_sync.assert_not_called()
+
+ def test_sync_reference_empty(self) -> None:
+ with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+ self.mgr.sync_reference(
+ reference={},
+ cache_dir=self.cache_dir,
+ cache_db=self.db,
+ )
+ mock_sync.assert_not_called()