diff --git a/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py b/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py index c2d76ed..26c867d 100644 --- a/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py +++ b/python/online/fxreader/pr34/commands_typed/archlinux/cli/archive_types.py @@ -46,3 +46,15 @@ class manager_t(abc.ABC): ) -> None: """Sync a range of dates.""" ... + + @abc.abstractmethod + def sync_reference( + self, + reference: dict[str, str], + cache_dir: pathlib.Path, + cache_db: cache_db_t, + repos: Optional[list[str]] = None, + arch: str = 'x86_64', + ) -> None: + """Fetch archive listings for pinned versions not in cache, sync needed dates.""" + ... diff --git a/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py b/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py new file mode 100644 index 0000000..9ee9b8a --- /dev/null +++ b/python/online/fxreader/pr34/commands_typed/archlinux/tests/test_archive_versions.py @@ -0,0 +1,352 @@ +import datetime +import pathlib +import sqlite3 +import tempfile +import unittest +import unittest.mock + +from ..apps.cache.db import cache_db_t, archive_version_row_t, archive_version_status_t +from ..apps.pacman.manager import pacman_manager_t, archive_entry_t + + +SAMPLE_NGINX_HTML = """\ + +Index of /packages/g/glibc/ + +

Index of /packages/g/glibc/


../
+glibc-2.37-3-x86_64.pkg.tar.zst                    15-Apr-2023 20:55              10M
+glibc-2.37-3-x86_64.pkg.tar.zst.sig                15-Apr-2023 20:55              566
+glibc-2.38-7-x86_64.pkg.tar.zst                    03-Dec-2023 18:33              10M
+glibc-2.38-7-x86_64.pkg.tar.zst.sig                03-Dec-2023 18:33              566
+glibc-2.39-1-x86_64.pkg.tar.zst                    02-Feb-2024 16:50              10M
+glibc-2.39-1-x86_64.pkg.tar.zst.sig                02-Feb-2024 16:50              566
+glibc-2.41-1-x86_64.pkg.tar.zst                    28-Jan-2025 03:11              11M
+glibc-2.41-1-x86_64.pkg.tar.zst.sig                28-Jan-2025 03:11              566
+

+ +""" + +SAMPLE_NGINX_HTML_PYTHON = """\ + +Index of /packages/p/python/ + +

Index of /packages/p/python/


../
+python-3.11.5-1-x86_64.pkg.tar.zst                 07-Aug-2023 12:00              20M
+python-3.11.5-1-x86_64.pkg.tar.zst.sig             07-Aug-2023 12:00              566
+python-3.12.1-1-x86_64.pkg.tar.zst                 15-Dec-2023 09:00              21M
+python-3.12.1-1-x86_64.pkg.tar.zst.sig             15-Dec-2023 09:00              566
+

+ +""" + + +class TestParseArchiveHtml(unittest.TestCase): + def test_parse_glibc_entries(self) -> None: + entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML) + self.assertEqual(len(entries), 4) + + def test_parse_excludes_sig_files(self) -> None: + entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML) + for e in entries: + self.assertFalse(e.filename.endswith('.sig')) + + def test_parse_extracts_version(self) -> None: + entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML) + versions = [e.version for e in entries] + self.assertIn('2.37-3', versions) + self.assertIn('2.38-7', versions) + self.assertIn('2.39-1', versions) + self.assertIn('2.41-1', versions) + + def test_parse_extracts_date(self) -> None: + entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML) + by_version = {e.version: e for e in entries} + self.assertEqual(by_version['2.39-1'].date, datetime.date(2024, 2, 2)) + self.assertEqual(by_version['2.41-1'].date, datetime.date(2025, 1, 28)) + + def test_parse_extracts_filename(self) -> None: + entries = pacman_manager_t.parse_archive_listing('glibc', SAMPLE_NGINX_HTML) + by_version = {e.version: e for e in entries} + self.assertEqual( + by_version['2.39-1'].filename, + 'glibc-2.39-1-x86_64.pkg.tar.zst', + ) + + def test_parse_empty_html(self) -> None: + html = '
'
+		entries = pacman_manager_t.parse_archive_listing('glibc', html)
+		self.assertEqual(entries, [])
+
+	def test_parse_python_package(self) -> None:
+		entries = pacman_manager_t.parse_archive_listing('python', SAMPLE_NGINX_HTML_PYTHON)
+		self.assertEqual(len(entries), 2)
+		versions = [e.version for e in entries]
+		self.assertIn('3.11.5-1', versions)
+		self.assertIn('3.12.1-1', versions)
+
+
+class TestArchiveVersionTable(unittest.TestCase):
+	def setUp(self) -> None:
+		self.conn = sqlite3.connect(':memory:')
+		cache_db_t.migrate(self.conn, 0, cache_db_t.schema_version())
+		self.db = cache_db_t(self.conn)
+
+	def tearDown(self) -> None:
+		self.conn.close()
+
+	def test_upsert_archive_version(self) -> None:
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.39-1',
+			archive_date=datetime.date(2024, 2, 2),
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		rows = list(self.db.list_archive_versions('glibc'))
+		self.assertEqual(len(rows), 1)
+		self.assertEqual(rows[0].version, '2.39-1')
+		self.assertEqual(rows[0].status, archive_version_status_t.pending)
+
+	def test_upsert_archive_version_idempotent(self) -> None:
+		for _ in range(3):
+			self.db.upsert_archive_version(
+				name='glibc',
+				version='2.39-1',
+				archive_date=datetime.date(2024, 2, 2),
+				filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+			)
+		rows = list(self.db.list_archive_versions('glibc'))
+		self.assertEqual(len(rows), 1)
+
+	def test_mark_synced(self) -> None:
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.39-1',
+			archive_date=datetime.date(2024, 2, 2),
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		self.db.mark_archive_version_synced('glibc', '2.39-1')
+		rows = list(self.db.list_archive_versions('glibc'))
+		self.assertEqual(rows[0].status, archive_version_status_t.synced)
+
+	def test_list_pending(self) -> None:
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.38-7',
+			archive_date=datetime.date(2023, 12, 3),
+			filename='glibc-2.38-7-x86_64.pkg.tar.zst',
+		)
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.39-1',
+			archive_date=datetime.date(2024, 2, 2),
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		self.db.mark_archive_version_synced('glibc', '2.38-7')
+
+		pending = list(self.db.list_pending_archive_versions())
+		self.assertEqual(len(pending), 1)
+		self.assertEqual(pending[0].name, 'glibc')
+		self.assertEqual(pending[0].version, '2.39-1')
+
+	def test_find_archive_date_for_version(self) -> None:
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.39-1',
+			archive_date=datetime.date(2024, 2, 2),
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		row = self.db.find_archive_version('glibc', '2.39-1')
+		self.assertIsNotNone(row)
+		self.assertEqual(row.archive_date, datetime.date(2024, 2, 2))
+
+	def test_find_archive_version_not_found(self) -> None:
+		row = self.db.find_archive_version('glibc', '9.99-1')
+		self.assertIsNone(row)
+
+	def test_bulk_upsert(self) -> None:
+		entries = [
+			archive_entry_t(
+				name='glibc',
+				version='2.38-7',
+				filename='glibc-2.38-7-x86_64.pkg.tar.zst',
+				date=datetime.date(2023, 12, 3),
+			),
+			archive_entry_t(
+				name='glibc',
+				version='2.39-1',
+				filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+				date=datetime.date(2024, 2, 2),
+			),
+		]
+		self.db.bulk_upsert_archive_versions(entries)
+		rows = list(self.db.list_archive_versions('glibc'))
+		self.assertEqual(len(rows), 2)
+
+	def test_pending_dates_for_reference(self) -> None:
+		"""Given a reference dict, find which versions are missing from cache
+		and return dates that need syncing."""
+		self.db.upsert_archive_version(
+			name='glibc',
+			version='2.39-1',
+			archive_date=datetime.date(2024, 2, 2),
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		self.db.upsert_archive_version(
+			name='python',
+			version='3.12.1-1',
+			archive_date=datetime.date(2023, 12, 15),
+			filename='python-3.12.1-1-x86_64.pkg.tar.zst',
+		)
+		# glibc synced, python not
+		self.db.mark_archive_version_synced('glibc', '2.39-1')
+
+		pending = list(self.db.list_pending_archive_versions())
+		dates = {r.archive_date for r in pending}
+		self.assertIn(datetime.date(2023, 12, 15), dates)
+		self.assertNotIn(datetime.date(2024, 2, 2), dates)
+
+
+class TestSyncReference(unittest.TestCase):
+	"""Tests for sync_reference: fetches archive listings, populates
+	archive_versions, determines dates to sync, syncs them."""
+
+	def setUp(self) -> None:
+		self.conn = sqlite3.connect(':memory:')
+		cache_db_t.migrate(self.conn, 0, cache_db_t.schema_version())
+		self.db = cache_db_t(self.conn)
+		self.mgr = pacman_manager_t()
+		self._tmp = tempfile.TemporaryDirectory()
+		self.cache_dir = pathlib.Path(self._tmp.name)
+
+	def tearDown(self) -> None:
+		self.conn.close()
+		self._tmp.cleanup()
+
+	def _mock_fetch_listing(self, responses: dict[str, str]) -> unittest.mock.MagicMock:
+		"""Mock _fetch_archive_page to return canned HTML per package name."""
+		def side_effect(pkg_name: str) -> str:
+			return responses.get(pkg_name, '
')
+		return unittest.mock.patch.object(
+			self.mgr, '_fetch_archive_page', side_effect=side_effect,
+		)
+
+	def test_sync_reference_discovers_versions(self) -> None:
+		reference = {'glibc': '2.39-1', 'python': '3.12.1-1'}
+
+		with self._mock_fetch_listing({
+			'glibc': SAMPLE_NGINX_HTML,
+			'python': SAMPLE_NGINX_HTML_PYTHON,
+		}):
+			with unittest.mock.patch.object(self.mgr, 'sync_date'):
+				self.mgr.sync_reference(
+					reference=reference,
+					cache_dir=self.cache_dir,
+					cache_db=self.db,
+				)
+
+		glibc_row = self.db.find_archive_version('glibc', '2.39-1')
+		self.assertIsNotNone(glibc_row)
+		self.assertEqual(glibc_row.archive_date, datetime.date(2024, 2, 2))
+
+		python_row = self.db.find_archive_version('python', '3.12.1-1')
+		self.assertIsNotNone(python_row)
+		self.assertEqual(python_row.archive_date, datetime.date(2023, 12, 15))
+
+	def test_sync_reference_calls_sync_date(self) -> None:
+		reference = {'glibc': '2.39-1'}
+
+		with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+			with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+				self.mgr.sync_reference(
+					reference=reference,
+					cache_dir=self.cache_dir,
+					cache_db=self.db,
+				)
+
+		# should sync the date corresponding to glibc 2.39-1 (2024/02/02)
+		self.assertGreater(mock_sync.call_count, 0)
+		synced_dates = {call.kwargs.get('date') or call.args[0] for call in mock_sync.call_args_list}
+		self.assertIn('2024/02/02', synced_dates)
+
+	def test_sync_reference_skips_already_cached(self) -> None:
+		"""If a (name, version) already exists in packages table, skip it."""
+		from ..apps.pacman.types import repo_index_t, package_desc_t
+
+		# pre-populate: glibc 2.39-1 is already in a cached snapshot
+		snapshot_id = self.db.upsert_snapshot(
+			date='2024/02/02',
+			repo='core',
+			arch='x86_64',
+			db_sha256='abc123',
+		)
+		idx = repo_index_t(name='core')
+		idx.packages['glibc'] = package_desc_t(
+			name='glibc', version='2.39-1',
+			filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+		)
+		self.db.store_index(snapshot_id=snapshot_id, index=idx)
+
+		reference = {'glibc': '2.39-1'}
+
+		with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}) as mock_fetch:
+			with unittest.mock.patch.object(self.mgr, 'sync_date'):
+				self.mgr.sync_reference(
+					reference=reference,
+					cache_dir=self.cache_dir,
+					cache_db=self.db,
+				)
+
+		# should not have fetched archive page since version is already in packages
+		mock_fetch.assert_not_called()
+
+	def test_sync_reference_marks_synced(self) -> None:
+		from ..apps.pacman.types import repo_index_t, package_desc_t
+
+		reference = {'glibc': '2.39-1'}
+
+		def fake_sync_date(**kwargs: object) -> None:
+			# simulate what sync_date does: insert a package into the db
+			snapshot_id = self.db.upsert_snapshot(
+				date='2024/02/02', repo='core', arch='x86_64', db_sha256='fake',
+			)
+			idx = repo_index_t(name='core')
+			idx.packages['glibc'] = package_desc_t(
+				name='glibc', version='2.39-1',
+				filename='glibc-2.39-1-x86_64.pkg.tar.zst',
+			)
+			self.db.store_index(snapshot_id=snapshot_id, index=idx)
+
+		with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+			with unittest.mock.patch.object(self.mgr, 'sync_date', side_effect=fake_sync_date):
+				self.mgr.sync_reference(
+					reference=reference,
+					cache_dir=self.cache_dir,
+					cache_db=self.db,
+				)
+
+		row = self.db.find_archive_version('glibc', '2.39-1')
+		self.assertIsNotNone(row)
+		self.assertEqual(row.status, archive_version_status_t.synced)
+
+	def test_sync_reference_version_not_in_archive(self) -> None:
+		"""If the version isn't found in archive listing, log warning, no crash."""
+		reference = {'glibc': '9.99.99-1'}
+
+		with self._mock_fetch_listing({'glibc': SAMPLE_NGINX_HTML}):
+			with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+				self.mgr.sync_reference(
+					reference=reference,
+					cache_dir=self.cache_dir,
+					cache_db=self.db,
+				)
+
+		mock_sync.assert_not_called()
+
+	def test_sync_reference_empty(self) -> None:
+		with unittest.mock.patch.object(self.mgr, 'sync_date') as mock_sync:
+			self.mgr.sync_reference(
+				reference={},
+				cache_dir=self.cache_dir,
+				cache_db=self.db,
+			)
+		mock_sync.assert_not_called()