CUSP-1256 (#1)

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

---------

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
Stephen Simpson
2025-11-20 12:16:33 -05:00
committed by GitHub
parent 5248edad62
commit ec32c72363
44 changed files with 4083 additions and 1540 deletions
+237
View File
@@ -0,0 +1,237 @@
"""Repository manager for querying and downloading RPM packages."""
import logging
from pathlib import Path
from typing import List, Set, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
import dnf
import requests
from ..models import Package
from .contents import ContentsParser
logger = logging.getLogger(__name__)
class RepoManager:
"""Manages Rocky Linux repository operations.
Handles:
- Repository configuration with DNF
- Package discovery and filtering
- Package downloads with progress tracking
"""
def __init__(
self,
repo_url: str,
version: str,
repo_type: str,
arch: str,
cache_dir: Path,
download_dir: Path,
):
"""Initialize repository manager.
Args:
repo_url: Full repository URL
version: Rocky Linux version (e.g., '9.5')
repo_type: Repository type ('BaseOS' or 'AppStream')
arch: Architecture (e.g., 'x86_64')
cache_dir: Directory for caching metadata
download_dir: Directory for downloading packages
"""
self.repo_url = repo_url
self.version = version
self.repo_type = repo_type
self.arch = arch
self.cache_dir = Path(cache_dir)
self.download_dir = Path(download_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.download_dir.mkdir(parents=True, exist_ok=True)
# Initialize DNF
self.base = dnf.Base()
self.base.conf.debuglevel = 0
self.base.conf.errorlevel = 0
self.base.conf.cachedir = str(self.cache_dir / "dnf")
self._configure_repo()
self.packages_with_manpages: Optional[Set[str]] = None
def _configure_repo(self):
"""Configure DNF repository."""
repo_id = f"rocky-{self.repo_type.lower()}-{self.version}-{self.arch}"
repo = dnf.repo.Repo(repo_id, self.base.conf)
repo.baseurl = [self.repo_url]
repo.enabled = True
repo.gpgcheck = False # We verify checksums separately
self.base.repos.add(repo)
logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
# Fill the sack (package database)
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
logger.info("Repository metadata loaded")
def discover_packages_with_manpages(self) -> Set[str]:
"""Discover which packages contain man pages using filelists.
This is the key optimization - we parse repository metadata
to identify packages with man pages before downloading anything.
Returns:
Set of package names that contain man pages
"""
if self.packages_with_manpages is not None:
return self.packages_with_manpages
parser = ContentsParser(self.repo_url, self.cache_dir)
self.packages_with_manpages = parser.get_packages_with_manpages()
return self.packages_with_manpages
def list_packages(self, with_manpages_only: bool = True) -> List[Package]:
"""List all packages in the repository.
Args:
with_manpages_only: If True, only return packages with man pages
Returns:
List of Package objects
"""
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})")
# Get packages with man pages if filtering
manpage_packages = None
if with_manpages_only:
manpage_packages = self.discover_packages_with_manpages()
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
packages = []
# Query all available packages
query = self.base.sack.query().available()
# For each package name, get only one arch (prefer noarch, then our target arch)
seen_names = set()
for pkg in query:
pkg_name = pkg.name
# Skip if we've already added this package
if pkg_name in seen_names:
continue
# Skip if filtering and package doesn't have man pages
if manpage_packages and pkg_name not in manpage_packages:
continue
# Get repo information
repo = pkg.repo
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
# Create Package object
package = Package(
name=pkg_name,
version=pkg.version,
release=pkg.release,
arch=pkg.arch,
repo_type=self.repo_type,
location=pkg.location,
baseurl=baseurl,
checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
has_manpages=True if manpage_packages else False,
)
packages.append(package)
seen_names.add(pkg_name)
logger.info(f"Found {len(packages)} packages to process")
return sorted(packages) # Sort by name for consistent ordering
def download_package(self, package: Package) -> bool:
"""Download a single package.
Args:
package: Package to download
Returns:
True if download successful, False otherwise
"""
download_path = self.download_dir / package.filename
package.download_path = download_path
# Skip if already downloaded
if download_path.exists():
logger.debug(f"Package already downloaded: {package.filename}")
return True
try:
logger.info(f"Downloading {package.filename}")
response = requests.get(package.download_url, timeout=300, stream=True)
response.raise_for_status()
# Download with progress (optional: could add progress bar here)
with open(download_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
logger.debug(f"Downloaded: {package.filename}")
return True
except Exception as e:
logger.error(f"Error downloading {package.filename}: {e}")
# Clean up partial download
if download_path.exists():
download_path.unlink()
return False
def download_packages(
self,
packages: List[Package],
max_workers: int = 5
) -> List[Package]:
"""Download multiple packages in parallel.
Args:
packages: List of packages to download
max_workers: Maximum number of parallel downloads
Returns:
List of successfully downloaded packages
"""
downloaded = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all download tasks
future_to_pkg = {
executor.submit(self.download_package, pkg): pkg
for pkg in packages
}
# Process completed downloads
for future in as_completed(future_to_pkg):
pkg = future_to_pkg[future]
try:
if future.result():
downloaded.append(pkg)
except Exception as e:
logger.error(f"Error processing {pkg.name}: {e}")
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages")
return downloaded
def cleanup_package(self, package: Package):
"""Delete a downloaded package file.
Args:
package: Package to clean up
"""
if package.download_path and package.download_path.exists():
package.download_path.unlink()
logger.debug(f"Deleted: {package.filename}")