CUSP-1256 (#1)
* Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> * Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> --------- Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,237 @@
|
||||
"""Repository manager for querying and downloading RPM packages."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import dnf
|
||||
import requests
|
||||
|
||||
from ..models import Package
|
||||
from .contents import ContentsParser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RepoManager:
|
||||
"""Manages Rocky Linux repository operations.
|
||||
|
||||
Handles:
|
||||
- Repository configuration with DNF
|
||||
- Package discovery and filtering
|
||||
- Package downloads with progress tracking
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_url: str,
|
||||
version: str,
|
||||
repo_type: str,
|
||||
arch: str,
|
||||
cache_dir: Path,
|
||||
download_dir: Path,
|
||||
):
|
||||
"""Initialize repository manager.
|
||||
|
||||
Args:
|
||||
repo_url: Full repository URL
|
||||
version: Rocky Linux version (e.g., '9.5')
|
||||
repo_type: Repository type ('BaseOS' or 'AppStream')
|
||||
arch: Architecture (e.g., 'x86_64')
|
||||
cache_dir: Directory for caching metadata
|
||||
download_dir: Directory for downloading packages
|
||||
"""
|
||||
self.repo_url = repo_url
|
||||
self.version = version
|
||||
self.repo_type = repo_type
|
||||
self.arch = arch
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.download_dir = Path(download_dir)
|
||||
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.download_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize DNF
|
||||
self.base = dnf.Base()
|
||||
self.base.conf.debuglevel = 0
|
||||
self.base.conf.errorlevel = 0
|
||||
self.base.conf.cachedir = str(self.cache_dir / "dnf")
|
||||
|
||||
self._configure_repo()
|
||||
self.packages_with_manpages: Optional[Set[str]] = None
|
||||
|
||||
def _configure_repo(self):
|
||||
"""Configure DNF repository."""
|
||||
repo_id = f"rocky-{self.repo_type.lower()}-{self.version}-{self.arch}"
|
||||
repo = dnf.repo.Repo(repo_id, self.base.conf)
|
||||
repo.baseurl = [self.repo_url]
|
||||
repo.enabled = True
|
||||
repo.gpgcheck = False # We verify checksums separately
|
||||
|
||||
self.base.repos.add(repo)
|
||||
logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
|
||||
|
||||
# Fill the sack (package database)
|
||||
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
||||
logger.info("Repository metadata loaded")
|
||||
|
||||
def discover_packages_with_manpages(self) -> Set[str]:
|
||||
"""Discover which packages contain man pages using filelists.
|
||||
|
||||
This is the key optimization - we parse repository metadata
|
||||
to identify packages with man pages before downloading anything.
|
||||
|
||||
Returns:
|
||||
Set of package names that contain man pages
|
||||
"""
|
||||
if self.packages_with_manpages is not None:
|
||||
return self.packages_with_manpages
|
||||
|
||||
parser = ContentsParser(self.repo_url, self.cache_dir)
|
||||
self.packages_with_manpages = parser.get_packages_with_manpages()
|
||||
|
||||
return self.packages_with_manpages
|
||||
|
||||
def list_packages(self, with_manpages_only: bool = True) -> List[Package]:
|
||||
"""List all packages in the repository.
|
||||
|
||||
Args:
|
||||
with_manpages_only: If True, only return packages with man pages
|
||||
|
||||
Returns:
|
||||
List of Package objects
|
||||
"""
|
||||
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})")
|
||||
|
||||
# Get packages with man pages if filtering
|
||||
manpage_packages = None
|
||||
if with_manpages_only:
|
||||
manpage_packages = self.discover_packages_with_manpages()
|
||||
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
|
||||
|
||||
packages = []
|
||||
|
||||
# Query all available packages
|
||||
query = self.base.sack.query().available()
|
||||
|
||||
# For each package name, get only one arch (prefer noarch, then our target arch)
|
||||
seen_names = set()
|
||||
|
||||
for pkg in query:
|
||||
pkg_name = pkg.name
|
||||
|
||||
# Skip if we've already added this package
|
||||
if pkg_name in seen_names:
|
||||
continue
|
||||
|
||||
# Skip if filtering and package doesn't have man pages
|
||||
if manpage_packages and pkg_name not in manpage_packages:
|
||||
continue
|
||||
|
||||
# Get repo information
|
||||
repo = pkg.repo
|
||||
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
|
||||
|
||||
# Create Package object
|
||||
package = Package(
|
||||
name=pkg_name,
|
||||
version=pkg.version,
|
||||
release=pkg.release,
|
||||
arch=pkg.arch,
|
||||
repo_type=self.repo_type,
|
||||
location=pkg.location,
|
||||
baseurl=baseurl,
|
||||
checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
|
||||
checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
|
||||
has_manpages=True if manpage_packages else False,
|
||||
)
|
||||
|
||||
packages.append(package)
|
||||
seen_names.add(pkg_name)
|
||||
|
||||
logger.info(f"Found {len(packages)} packages to process")
|
||||
return sorted(packages) # Sort by name for consistent ordering
|
||||
|
||||
def download_package(self, package: Package) -> bool:
|
||||
"""Download a single package.
|
||||
|
||||
Args:
|
||||
package: Package to download
|
||||
|
||||
Returns:
|
||||
True if download successful, False otherwise
|
||||
"""
|
||||
download_path = self.download_dir / package.filename
|
||||
package.download_path = download_path
|
||||
|
||||
# Skip if already downloaded
|
||||
if download_path.exists():
|
||||
logger.debug(f"Package already downloaded: {package.filename}")
|
||||
return True
|
||||
|
||||
try:
|
||||
logger.info(f"Downloading {package.filename}")
|
||||
response = requests.get(package.download_url, timeout=300, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Download with progress (optional: could add progress bar here)
|
||||
with open(download_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
logger.debug(f"Downloaded: {package.filename}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {package.filename}: {e}")
|
||||
# Clean up partial download
|
||||
if download_path.exists():
|
||||
download_path.unlink()
|
||||
return False
|
||||
|
||||
def download_packages(
|
||||
self,
|
||||
packages: List[Package],
|
||||
max_workers: int = 5
|
||||
) -> List[Package]:
|
||||
"""Download multiple packages in parallel.
|
||||
|
||||
Args:
|
||||
packages: List of packages to download
|
||||
max_workers: Maximum number of parallel downloads
|
||||
|
||||
Returns:
|
||||
List of successfully downloaded packages
|
||||
"""
|
||||
downloaded = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all download tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.download_package, pkg): pkg
|
||||
for pkg in packages
|
||||
}
|
||||
|
||||
# Process completed downloads
|
||||
for future in as_completed(future_to_pkg):
|
||||
pkg = future_to_pkg[future]
|
||||
try:
|
||||
if future.result():
|
||||
downloaded.append(pkg)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {pkg.name}: {e}")
|
||||
|
||||
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages")
|
||||
return downloaded
|
||||
|
||||
def cleanup_package(self, package: Package):
|
||||
"""Delete a downloaded package file.
|
||||
|
||||
Args:
|
||||
package: Package to clean up
|
||||
"""
|
||||
if package.download_path and package.download_path.exists():
|
||||
package.download_path.unlink()
|
||||
logger.debug(f"Deleted: {package.filename}")
|
||||
Reference in New Issue
Block a user