"""Configuration management for Rocky Man.""" from dataclasses import dataclass from pathlib import Path from typing import List @dataclass class Config: """Configuration for Rocky Man page generation. Attributes: base_url: Base URL for Rocky Linux mirror content_dir: Content directory path (usually 'pub/rocky') versions: List of Rocky Linux versions to process (e.g., ['8.10', '9.5']) architectures: List of architectures to consider (we'll pick one) repo_types: Repository types to process (e.g., ['BaseOS', 'AppStream']) download_dir: Directory for downloading RPM packages extract_dir: Directory for extracting man pages output_dir: Directory for generated HTML files keep_rpms: Whether to keep downloaded RPM files after processing keep_extracts: Whether to keep extracted man files after processing parallel_downloads: Number of parallel downloads parallel_conversions: Number of parallel HTML conversions """ # Repository configuration base_url: str = "http://dl.rockylinux.org/" content_dir: str = "pub/rocky" versions: List[str] = None architectures: List[str] = None repo_types: List[str] = None # Directory configuration download_dir: Path = Path("/data/tmp/downloads") extract_dir: Path = Path("/data/tmp/extracts") output_dir: Path = Path("/data/html") # Cleanup options keep_rpms: bool = False keep_extracts: bool = False # Performance options parallel_downloads: int = 5 parallel_conversions: int = 10 # Filtering options skip_sections: List[str] = None skip_packages: List[str] = None skip_languages: bool = True # Skip non-English languages by default allow_all_sections: bool = False # Override skip_sections if True def __post_init__(self): """Set defaults and ensure directories exist.""" if self.versions is None: self.versions = ["8.10", "9.6", "10.0"] if self.architectures is None: # Man pages are arch-independent, so we just need one # We prefer x86_64 as it's most common, fallback to others self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"] if self.repo_types is None: self.repo_types = ["BaseOS", "AppStream"] # Set default skip sections (man3 library APIs) if self.skip_sections is None and not self.allow_all_sections: self.skip_sections = ["3", "3p", "3pm"] elif self.allow_all_sections: self.skip_sections = [] # Set default skip packages (high-volume API docs) if self.skip_packages is None: self.skip_packages = [ "lapack", "dpdk-devel", "gl-manpages", ] # Ensure all paths are Path objects self.download_dir = Path(self.download_dir) self.extract_dir = Path(self.extract_dir) self.output_dir = Path(self.output_dir) def get_repo_url(self, version: str, repo_type: str, arch: str) -> str: """Construct repository URL for given parameters. Args: version: Rocky Linux version (e.g., '9.5') repo_type: Repository type ('BaseOS' or 'AppStream') arch: Architecture (e.g., 'x86_64') Returns: Full repository URL """ url = self.base_url.rstrip('/') path = f"{self.content_dir}/{version}/{repo_type}/{arch}/os" return f"{url}/{path}/" def get_version_output_dir(self, version: str) -> Path: """Get output directory for a specific version.""" return self.output_dir / version def get_version_download_dir(self, version: str) -> Path: """Get download directory for a specific version.""" return self.download_dir / version def get_version_extract_dir(self, version: str) -> Path: """Get extract directory for a specific version.""" return self.extract_dir / version