CUSP-1256 (#1)

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

---------

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
Stephen Simpson
2025-11-20 12:16:33 -05:00
committed by GitHub
parent 5248edad62
commit ec32c72363
44 changed files with 4083 additions and 1540 deletions

View File

@@ -0,0 +1,110 @@
"""Configuration management for Rocky Man."""
from dataclasses import dataclass
from pathlib import Path
from typing import List
@dataclass
class Config:
"""Configuration for Rocky Man page generation.
Attributes:
base_url: Base URL for Rocky Linux mirror
content_dir: Content directory path (usually 'pub/rocky')
versions: List of Rocky Linux versions to process (e.g., ['8.10', '9.5'])
architectures: List of architectures to consider (we'll pick one)
repo_types: Repository types to process (e.g., ['BaseOS', 'AppStream'])
download_dir: Directory for downloading RPM packages
extract_dir: Directory for extracting man pages
output_dir: Directory for generated HTML files
keep_rpms: Whether to keep downloaded RPM files after processing
keep_extracts: Whether to keep extracted man files after processing
parallel_downloads: Number of parallel downloads
parallel_conversions: Number of parallel HTML conversions
"""
# Repository configuration
base_url: str = "http://dl.rockylinux.org/"
content_dir: str = "pub/rocky"
versions: List[str] = None
architectures: List[str] = None
repo_types: List[str] = None
# Directory configuration
download_dir: Path = Path("/data/tmp/downloads")
extract_dir: Path = Path("/data/tmp/extracts")
output_dir: Path = Path("/data/html")
# Cleanup options
keep_rpms: bool = False
keep_extracts: bool = False
# Performance options
parallel_downloads: int = 5
parallel_conversions: int = 10
# Filtering options
skip_sections: List[str] = None
skip_packages: List[str] = None
skip_languages: bool = True # Skip non-English languages by default
allow_all_sections: bool = False # Override skip_sections if True
def __post_init__(self):
"""Set defaults and ensure directories exist."""
if self.versions is None:
self.versions = ["8.10", "9.6", "10.0"]
if self.architectures is None:
# Man pages are arch-independent, so we just need one
# We prefer x86_64 as it's most common, fallback to others
self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"]
if self.repo_types is None:
self.repo_types = ["BaseOS", "AppStream"]
# Set default skip sections (man3 library APIs)
if self.skip_sections is None and not self.allow_all_sections:
self.skip_sections = ["3", "3p", "3pm"]
elif self.allow_all_sections:
self.skip_sections = []
# Set default skip packages (high-volume API docs)
if self.skip_packages is None:
self.skip_packages = [
"lapack",
"dpdk-devel",
"gl-manpages",
]
# Ensure all paths are Path objects
self.download_dir = Path(self.download_dir)
self.extract_dir = Path(self.extract_dir)
self.output_dir = Path(self.output_dir)
def get_repo_url(self, version: str, repo_type: str, arch: str) -> str:
"""Construct repository URL for given parameters.
Args:
version: Rocky Linux version (e.g., '9.5')
repo_type: Repository type ('BaseOS' or 'AppStream')
arch: Architecture (e.g., 'x86_64')
Returns:
Full repository URL
"""
url = self.base_url.rstrip('/')
path = f"{self.content_dir}/{version}/{repo_type}/{arch}/os"
return f"{url}/{path}/"
def get_version_output_dir(self, version: str) -> Path:
"""Get output directory for a specific version."""
return self.output_dir / version
def get_version_download_dir(self, version: str) -> Path:
"""Get download directory for a specific version."""
return self.download_dir / version
def get_version_extract_dir(self, version: str) -> Path:
"""Get extract directory for a specific version."""
return self.extract_dir / version