ec32c72363
* Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> * Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> --------- Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
238 lines
7.8 KiB
Python
238 lines
7.8 KiB
Python
"""Repository manager for querying and downloading RPM packages."""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import List, Set, Optional
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
import dnf
|
|
import requests
|
|
|
|
from ..models import Package
|
|
from .contents import ContentsParser
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RepoManager:
|
|
"""Manages Rocky Linux repository operations.
|
|
|
|
Handles:
|
|
- Repository configuration with DNF
|
|
- Package discovery and filtering
|
|
- Package downloads with progress tracking
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
repo_url: str,
|
|
version: str,
|
|
repo_type: str,
|
|
arch: str,
|
|
cache_dir: Path,
|
|
download_dir: Path,
|
|
):
|
|
"""Initialize repository manager.
|
|
|
|
Args:
|
|
repo_url: Full repository URL
|
|
version: Rocky Linux version (e.g., '9.5')
|
|
repo_type: Repository type ('BaseOS' or 'AppStream')
|
|
arch: Architecture (e.g., 'x86_64')
|
|
cache_dir: Directory for caching metadata
|
|
download_dir: Directory for downloading packages
|
|
"""
|
|
self.repo_url = repo_url
|
|
self.version = version
|
|
self.repo_type = repo_type
|
|
self.arch = arch
|
|
self.cache_dir = Path(cache_dir)
|
|
self.download_dir = Path(download_dir)
|
|
|
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
self.download_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Initialize DNF
|
|
self.base = dnf.Base()
|
|
self.base.conf.debuglevel = 0
|
|
self.base.conf.errorlevel = 0
|
|
self.base.conf.cachedir = str(self.cache_dir / "dnf")
|
|
|
|
self._configure_repo()
|
|
self.packages_with_manpages: Optional[Set[str]] = None
|
|
|
|
def _configure_repo(self):
|
|
"""Configure DNF repository."""
|
|
repo_id = f"rocky-{self.repo_type.lower()}-{self.version}-{self.arch}"
|
|
repo = dnf.repo.Repo(repo_id, self.base.conf)
|
|
repo.baseurl = [self.repo_url]
|
|
repo.enabled = True
|
|
repo.gpgcheck = False # We verify checksums separately
|
|
|
|
self.base.repos.add(repo)
|
|
logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
|
|
|
|
# Fill the sack (package database)
|
|
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
|
logger.info("Repository metadata loaded")
|
|
|
|
def discover_packages_with_manpages(self) -> Set[str]:
|
|
"""Discover which packages contain man pages using filelists.
|
|
|
|
This is the key optimization - we parse repository metadata
|
|
to identify packages with man pages before downloading anything.
|
|
|
|
Returns:
|
|
Set of package names that contain man pages
|
|
"""
|
|
if self.packages_with_manpages is not None:
|
|
return self.packages_with_manpages
|
|
|
|
parser = ContentsParser(self.repo_url, self.cache_dir)
|
|
self.packages_with_manpages = parser.get_packages_with_manpages()
|
|
|
|
return self.packages_with_manpages
|
|
|
|
def list_packages(self, with_manpages_only: bool = True) -> List[Package]:
|
|
"""List all packages in the repository.
|
|
|
|
Args:
|
|
with_manpages_only: If True, only return packages with man pages
|
|
|
|
Returns:
|
|
List of Package objects
|
|
"""
|
|
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})")
|
|
|
|
# Get packages with man pages if filtering
|
|
manpage_packages = None
|
|
if with_manpages_only:
|
|
manpage_packages = self.discover_packages_with_manpages()
|
|
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
|
|
|
|
packages = []
|
|
|
|
# Query all available packages
|
|
query = self.base.sack.query().available()
|
|
|
|
# For each package name, get only one arch (prefer noarch, then our target arch)
|
|
seen_names = set()
|
|
|
|
for pkg in query:
|
|
pkg_name = pkg.name
|
|
|
|
# Skip if we've already added this package
|
|
if pkg_name in seen_names:
|
|
continue
|
|
|
|
# Skip if filtering and package doesn't have man pages
|
|
if manpage_packages and pkg_name not in manpage_packages:
|
|
continue
|
|
|
|
# Get repo information
|
|
repo = pkg.repo
|
|
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
|
|
|
|
# Create Package object
|
|
package = Package(
|
|
name=pkg_name,
|
|
version=pkg.version,
|
|
release=pkg.release,
|
|
arch=pkg.arch,
|
|
repo_type=self.repo_type,
|
|
location=pkg.location,
|
|
baseurl=baseurl,
|
|
checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
|
|
checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
|
|
has_manpages=True if manpage_packages else False,
|
|
)
|
|
|
|
packages.append(package)
|
|
seen_names.add(pkg_name)
|
|
|
|
logger.info(f"Found {len(packages)} packages to process")
|
|
return sorted(packages) # Sort by name for consistent ordering
|
|
|
|
def download_package(self, package: Package) -> bool:
|
|
"""Download a single package.
|
|
|
|
Args:
|
|
package: Package to download
|
|
|
|
Returns:
|
|
True if download successful, False otherwise
|
|
"""
|
|
download_path = self.download_dir / package.filename
|
|
package.download_path = download_path
|
|
|
|
# Skip if already downloaded
|
|
if download_path.exists():
|
|
logger.debug(f"Package already downloaded: {package.filename}")
|
|
return True
|
|
|
|
try:
|
|
logger.info(f"Downloading {package.filename}")
|
|
response = requests.get(package.download_url, timeout=300, stream=True)
|
|
response.raise_for_status()
|
|
|
|
# Download with progress (optional: could add progress bar here)
|
|
with open(download_path, 'wb') as f:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
f.write(chunk)
|
|
|
|
logger.debug(f"Downloaded: {package.filename}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error downloading {package.filename}: {e}")
|
|
# Clean up partial download
|
|
if download_path.exists():
|
|
download_path.unlink()
|
|
return False
|
|
|
|
def download_packages(
|
|
self,
|
|
packages: List[Package],
|
|
max_workers: int = 5
|
|
) -> List[Package]:
|
|
"""Download multiple packages in parallel.
|
|
|
|
Args:
|
|
packages: List of packages to download
|
|
max_workers: Maximum number of parallel downloads
|
|
|
|
Returns:
|
|
List of successfully downloaded packages
|
|
"""
|
|
downloaded = []
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
# Submit all download tasks
|
|
future_to_pkg = {
|
|
executor.submit(self.download_package, pkg): pkg
|
|
for pkg in packages
|
|
}
|
|
|
|
# Process completed downloads
|
|
for future in as_completed(future_to_pkg):
|
|
pkg = future_to_pkg[future]
|
|
try:
|
|
if future.result():
|
|
downloaded.append(pkg)
|
|
except Exception as e:
|
|
logger.error(f"Error processing {pkg.name}: {e}")
|
|
|
|
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages")
|
|
return downloaded
|
|
|
|
def cleanup_package(self, package: Package):
|
|
"""Delete a downloaded package file.
|
|
|
|
Args:
|
|
package: Package to clean up
|
|
"""
|
|
if package.download_path and package.download_path.exists():
|
|
package.download_path.unlink()
|
|
logger.debug(f"Deleted: {package.filename}")
|