CUSP-1256 (#1)
* Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> * Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> --------- Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
5
src/rocky_man/__init__.py
Normal file
5
src/rocky_man/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .utils.config import Config
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
__all__ = ["Config"]
|
||||
377
src/rocky_man/main.py
Normal file
377
src/rocky_man/main.py
Normal file
@@ -0,0 +1,377 @@
|
||||
"""Main entry point for Rocky Man."""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from .utils.config import Config
|
||||
from .repo import RepoManager
|
||||
from .processor import ManPageExtractor, ManPageConverter
|
||||
from .web import WebGenerator
|
||||
|
||||
|
||||
def setup_logging(verbose: bool = False):
|
||||
"""Configure logging."""
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
|
||||
def process_version(
|
||||
config: Config,
|
||||
version: str,
|
||||
template_dir: Path
|
||||
) -> bool:
|
||||
"""Process a single Rocky Linux version.
|
||||
|
||||
Args:
|
||||
config: Configuration object
|
||||
version: Rocky Linux version to process
|
||||
template_dir: Path to templates directory
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Processing Rocky Linux {version}")
|
||||
|
||||
# Setup directories for this version
|
||||
version_download_dir = config.get_version_download_dir(version)
|
||||
version_extract_dir = config.get_version_extract_dir(version)
|
||||
version_output_dir = config.get_version_output_dir(version)
|
||||
|
||||
all_man_files = []
|
||||
|
||||
# Process each repository type
|
||||
for repo_type in config.repo_types:
|
||||
logger.info(f"Processing {repo_type} repository")
|
||||
|
||||
# Use first available architecture (man pages are arch-independent)
|
||||
arch = config.architectures[0]
|
||||
|
||||
# Get repository URL
|
||||
repo_url = config.get_repo_url(version, repo_type, arch)
|
||||
|
||||
# Create cache dir for this repo
|
||||
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
|
||||
|
||||
try:
|
||||
# Initialize repository manager
|
||||
repo_manager = RepoManager(
|
||||
repo_url=repo_url,
|
||||
version=version,
|
||||
repo_type=repo_type,
|
||||
arch=arch,
|
||||
cache_dir=cache_dir,
|
||||
download_dir=version_download_dir
|
||||
)
|
||||
|
||||
# List packages (with man pages only)
|
||||
packages = repo_manager.list_packages(with_manpages_only=True)
|
||||
|
||||
if not packages:
|
||||
logger.warning(f"No packages found in {repo_type}")
|
||||
continue
|
||||
|
||||
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
|
||||
|
||||
# Filter out packages that should be skipped
|
||||
if config.skip_packages:
|
||||
original_count = len(packages)
|
||||
packages = [
|
||||
pkg for pkg in packages
|
||||
if pkg.name not in config.skip_packages
|
||||
]
|
||||
filtered_count = original_count - len(packages)
|
||||
if filtered_count > 0:
|
||||
logger.info(f"Filtered out {filtered_count} packages based on skip list")
|
||||
logger.info(f"Processing {len(packages)} packages")
|
||||
|
||||
# Download packages
|
||||
logger.info("Downloading packages...")
|
||||
downloaded = repo_manager.download_packages(
|
||||
packages,
|
||||
max_workers=config.parallel_downloads
|
||||
)
|
||||
|
||||
# Extract man pages
|
||||
logger.info("Extracting man pages...")
|
||||
extractor = ManPageExtractor(
|
||||
version_extract_dir,
|
||||
skip_sections=config.skip_sections,
|
||||
skip_languages=config.skip_languages
|
||||
)
|
||||
man_files = extractor.extract_from_packages(
|
||||
downloaded,
|
||||
max_workers=config.parallel_downloads
|
||||
)
|
||||
|
||||
logger.info(f"Extracted {len(man_files)} man pages")
|
||||
|
||||
# Read content for each man file
|
||||
logger.info("Reading man page content...")
|
||||
man_files_with_content = []
|
||||
for man_file in man_files:
|
||||
content = extractor.read_manpage_content(man_file)
|
||||
if content:
|
||||
man_files_with_content.append((man_file, content))
|
||||
|
||||
# Convert to HTML
|
||||
logger.info("Converting man pages to HTML...")
|
||||
converter = ManPageConverter(version_output_dir)
|
||||
converted = converter.convert_many(
|
||||
man_files_with_content,
|
||||
max_workers=config.parallel_conversions
|
||||
)
|
||||
|
||||
all_man_files.extend(converted)
|
||||
|
||||
# Cleanup if requested
|
||||
if not config.keep_rpms:
|
||||
logger.info("Cleaning up downloaded packages...")
|
||||
for package in downloaded:
|
||||
repo_manager.cleanup_package(package)
|
||||
|
||||
if not config.keep_extracts:
|
||||
logger.info("Cleaning up extracted files...")
|
||||
for package in downloaded:
|
||||
extractor.cleanup_extracts(package)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {repo_type}: {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
if not all_man_files:
|
||||
logger.error(f"No man pages were successfully processed for version {version}")
|
||||
return False
|
||||
|
||||
# Link cross-references between man pages
|
||||
logger.info("Linking cross-references...")
|
||||
converter = ManPageConverter(version_output_dir)
|
||||
converter.link_cross_references(all_man_files)
|
||||
|
||||
# Generate web pages
|
||||
logger.info("Generating web pages...")
|
||||
web_gen = WebGenerator(template_dir, config.output_dir)
|
||||
|
||||
# Generate search index
|
||||
search_index = web_gen.generate_search_index(all_man_files, version)
|
||||
web_gen.save_search_index(search_index, version)
|
||||
|
||||
# Generate index page
|
||||
web_gen.generate_index(version, search_index)
|
||||
|
||||
# Generate packages index page
|
||||
web_gen.generate_packages_index(version, search_index)
|
||||
|
||||
# Wrap man pages in templates
|
||||
logger.info("Generating man page HTML...")
|
||||
for man_file in all_man_files:
|
||||
web_gen.generate_manpage_html(man_file, version)
|
||||
|
||||
logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate HTML documentation for Rocky Linux man pages'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--versions',
|
||||
nargs='+',
|
||||
default=['8.10', '9.6', '10.0'],
|
||||
help='Rocky Linux versions to process (default: 8.10 9.6 10.0)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--repo-types',
|
||||
nargs='+',
|
||||
default=['BaseOS', 'AppStream'],
|
||||
help='Repository types to process (default: BaseOS AppStream)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
type=Path,
|
||||
default=Path('./html'),
|
||||
help='Output directory for HTML files (default: ./html)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--download-dir',
|
||||
type=Path,
|
||||
default=Path('./tmp/downloads'),
|
||||
help='Directory for downloading packages (default: ./tmp/downloads)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--extract-dir',
|
||||
type=Path,
|
||||
default=Path('./tmp/extracts'),
|
||||
help='Directory for extracting man pages (default: ./tmp/extracts)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-rpms',
|
||||
action='store_true',
|
||||
help='Keep downloaded RPM files after processing'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-extracts',
|
||||
action='store_true',
|
||||
help='Keep extracted man files after processing'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--parallel-downloads',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of parallel downloads (default: 5)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--parallel-conversions',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of parallel HTML conversions (default: 10)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--mirror',
|
||||
default='http://dl.rockylinux.org/',
|
||||
help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--template-dir',
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent.parent / 'templates',
|
||||
help='Template directory (default: ./templates)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='Enable verbose logging'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-sections',
|
||||
nargs='*',
|
||||
default=None,
|
||||
help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-packages',
|
||||
nargs='*',
|
||||
default=None,
|
||||
help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-languages',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='Skip non-English man pages (default: enabled)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-languages',
|
||||
action='store_true',
|
||||
help='Keep all languages (disables --skip-languages)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--allow-all-sections',
|
||||
action='store_true',
|
||||
help='Include all man sections (overrides --skip-sections)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
setup_logging(args.verbose)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Handle filtering options
|
||||
skip_languages = True # default
|
||||
if args.keep_languages:
|
||||
skip_languages = False
|
||||
elif args.skip_languages is not None:
|
||||
skip_languages = args.skip_languages
|
||||
|
||||
# Create configuration
|
||||
config = Config(
|
||||
base_url=args.mirror,
|
||||
versions=args.versions,
|
||||
repo_types=args.repo_types,
|
||||
download_dir=args.download_dir,
|
||||
extract_dir=args.extract_dir,
|
||||
output_dir=args.output_dir,
|
||||
keep_rpms=args.keep_rpms,
|
||||
keep_extracts=args.keep_extracts,
|
||||
parallel_downloads=args.parallel_downloads,
|
||||
parallel_conversions=args.parallel_conversions,
|
||||
skip_sections=args.skip_sections,
|
||||
skip_packages=args.skip_packages,
|
||||
skip_languages=skip_languages,
|
||||
allow_all_sections=args.allow_all_sections
|
||||
)
|
||||
|
||||
logger.info("Rocky Man - Rocky Linux Man Page Generator")
|
||||
logger.info(f"Versions: {', '.join(config.versions)}")
|
||||
logger.info(f"Repositories: {', '.join(config.repo_types)}")
|
||||
logger.info(f"Output directory: {config.output_dir}")
|
||||
|
||||
# Log filtering configuration
|
||||
if config.skip_sections:
|
||||
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
|
||||
else:
|
||||
logger.info("Including all man sections")
|
||||
|
||||
if config.skip_packages:
|
||||
logger.info(f"Skipping packages: {', '.join(config.skip_packages)}")
|
||||
|
||||
if config.skip_languages:
|
||||
logger.info("Skipping non-English languages")
|
||||
else:
|
||||
logger.info("Including all languages")
|
||||
|
||||
# Process each version
|
||||
processed_versions = []
|
||||
for version in config.versions:
|
||||
try:
|
||||
if process_version(config, version, args.template_dir):
|
||||
processed_versions.append(version)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process version {version}: {e}", exc_info=True)
|
||||
|
||||
if not processed_versions:
|
||||
logger.error("No versions were successfully processed")
|
||||
return 1
|
||||
|
||||
# Generate root index
|
||||
logger.info("Generating root index page...")
|
||||
web_gen = WebGenerator(args.template_dir, config.output_dir)
|
||||
web_gen.generate_root_index(processed_versions)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("Processing complete!")
|
||||
logger.info(f"Generated documentation for: {', '.join(processed_versions)}")
|
||||
logger.info(f"Output directory: {config.output_dir.absolute()}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
6
src/rocky_man/models/__init__.py
Normal file
6
src/rocky_man/models/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Data models for Rocky Man."""
|
||||
|
||||
from .package import Package
|
||||
from .manfile import ManFile
|
||||
|
||||
__all__ = ["Package", "ManFile"]
|
||||
130
src/rocky_man/models/manfile.py
Normal file
130
src/rocky_man/models/manfile.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""ManFile model representing a man page file."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import re
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManFile:
|
||||
"""Represents a man page file extracted from an RPM package.
|
||||
|
||||
Attributes:
|
||||
file_path: Path to the extracted man page file
|
||||
package_name: Name of the package this man page belongs to
|
||||
section: Man page section (1-9)
|
||||
name: Man page name without extension
|
||||
language: Language code (e.g., 'en', 'es', None for default)
|
||||
content: Raw man page content (gzipped or plain text)
|
||||
html_content: Converted HTML content
|
||||
html_path: Path where HTML file is saved
|
||||
"""
|
||||
|
||||
file_path: Path
|
||||
package_name: str
|
||||
section: Optional[str] = None
|
||||
name: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
content: Optional[bytes] = None
|
||||
html_content: Optional[str] = None
|
||||
html_path: Optional[Path] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Parse file information from the path."""
|
||||
self._parse_path()
|
||||
|
||||
def _parse_path(self):
|
||||
"""Extract section, name, and language from the file path.
|
||||
|
||||
Example paths:
|
||||
/usr/share/man/man1/bash.1.gz
|
||||
/usr/share/man/es/man1/bash.1.gz
|
||||
/usr/share/man/man3/printf.3.gz
|
||||
"""
|
||||
parts = self.file_path.parts
|
||||
filename = self.file_path.name
|
||||
|
||||
# Remove .gz extension if present
|
||||
if filename.endswith('.gz'):
|
||||
filename = filename[:-3]
|
||||
|
||||
# Extract section from parent directory (e.g., 'man1', 'man3p', 'man3pm')
|
||||
for part in reversed(parts):
|
||||
if part.startswith('man') and len(part) > 3:
|
||||
# Check if it starts with 'man' followed by a digit
|
||||
if part[3].isdigit():
|
||||
self.section = part[3:]
|
||||
break
|
||||
|
||||
# Extract section from filename if not found yet (e.g., 'foo.3pm' -> section '3pm')
|
||||
# and extract name
|
||||
name_parts = filename.split('.')
|
||||
if len(name_parts) >= 2:
|
||||
# Try to identify section from last part
|
||||
potential_section = name_parts[-1]
|
||||
# Section is typically digit optionally followed by letters (1, 3p, 3pm, etc.)
|
||||
if potential_section and potential_section[0].isdigit():
|
||||
if not self.section:
|
||||
self.section = potential_section
|
||||
self.name = '.'.join(name_parts[:-1])
|
||||
else:
|
||||
self.name = name_parts[0]
|
||||
else:
|
||||
self.name = name_parts[0]
|
||||
|
||||
# Check for language subdirectory
|
||||
# Pattern: /usr/share/man/<lang>/man<section>/
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'man' and i + 1 < len(parts):
|
||||
next_part = parts[i + 1]
|
||||
# If next part is not 'man<digit>', it's a language code
|
||||
if not (next_part.startswith('man') and next_part[3:].isdigit()):
|
||||
# Common language codes are 2-5 chars (en, es, pt_BR, etc.)
|
||||
if len(next_part) <= 5:
|
||||
self.language = next_part
|
||||
break
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Get display name for the man page (e.g., 'bash(1)')."""
|
||||
return f"{self.name}({self.section})" if self.section else self.name
|
||||
|
||||
@property
|
||||
def html_filename(self) -> str:
|
||||
"""Get the HTML filename for this man page."""
|
||||
# Clean name for filesystem safety
|
||||
safe_name = self._clean_filename(self.name)
|
||||
suffix = f".{self.language}" if self.language else ""
|
||||
return f"{safe_name}.{self.section}{suffix}.html"
|
||||
|
||||
def _clean_filename(self, name: str) -> str:
|
||||
"""Clean filename for filesystem safety."""
|
||||
# Replace problematic characters
|
||||
name = name.replace('/', '_')
|
||||
name = name.replace(':', '_')
|
||||
name = re.sub(r'\.\.', '__', name)
|
||||
return name
|
||||
|
||||
@property
|
||||
def uri_path(self) -> str:
|
||||
"""Get the URI path for this man page (relative to version root).
|
||||
|
||||
Returns path like: 'bash/man1/bash.1.html'
|
||||
"""
|
||||
if not self.html_path:
|
||||
return ""
|
||||
# Get path relative to the version directory
|
||||
# Assuming structure: html/<version>/<package>/<section>/<file>.html
|
||||
parts = self.html_path.parts
|
||||
try:
|
||||
# Find the version part (e.g., '9.5') and return everything after it
|
||||
for i, part in enumerate(parts):
|
||||
if re.match(r'\d+\.\d+', part): # Version pattern
|
||||
return '/'.join(parts[i+1:])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
return str(self.html_path)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.package_name}: {self.display_name}"
|
||||
58
src/rocky_man/models/package.py
Normal file
58
src/rocky_man/models/package.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Package model representing an RPM package."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Package:
|
||||
"""Represents an RPM package from a Rocky Linux repository.
|
||||
|
||||
Attributes:
|
||||
name: Package name (e.g., 'bash')
|
||||
version: Package version
|
||||
release: Package release
|
||||
arch: Architecture (e.g., 'x86_64', 'noarch')
|
||||
repo_type: Repository type ('BaseOS' or 'AppStream')
|
||||
location: Relative path in repo (e.g., 'Packages/b/bash-5.1.8-6.el9.x86_64.rpm')
|
||||
baseurl: Base URL of the repository
|
||||
checksum: Package checksum for verification
|
||||
checksum_type: Type of checksum (e.g., 'sha256')
|
||||
download_path: Local path where package is downloaded
|
||||
has_manpages: Whether this package contains man pages
|
||||
"""
|
||||
|
||||
name: str
|
||||
version: str
|
||||
release: str
|
||||
arch: str
|
||||
repo_type: str
|
||||
location: str
|
||||
baseurl: str
|
||||
checksum: str
|
||||
checksum_type: str
|
||||
has_manpages: bool = False
|
||||
download_path: Optional[Path] = None
|
||||
|
||||
@property
|
||||
def filename(self) -> str:
|
||||
"""Get the RPM filename from the location."""
|
||||
return self.location.split("/")[-1]
|
||||
|
||||
@property
|
||||
def download_url(self) -> str:
|
||||
"""Get the full download URL for this package."""
|
||||
return f"{self.baseurl.rstrip('/')}/{self.location.lstrip('/')}"
|
||||
|
||||
@property
|
||||
def nvra(self) -> str:
|
||||
"""Get the Name-Version-Release-Arch identifier."""
|
||||
return f"{self.name}-{self.version}-{self.release}.{self.arch}"
|
||||
|
||||
def __lt__(self, other):
|
||||
"""Enable sorting packages by name."""
|
||||
return self.name < other.name
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.nvra} ({self.repo_type})"
|
||||
4
src/rocky_man/processor/__init__.py
Normal file
4
src/rocky_man/processor/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .extractor import ManPageExtractor
|
||||
from .converter import ManPageConverter
|
||||
|
||||
__all__ = ["ManPageExtractor", "ManPageConverter"]
|
||||
292
src/rocky_man/processor/converter.py
Normal file
292
src/rocky_man/processor/converter.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""Convert man pages to HTML using mandoc."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from ..models import ManFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ManPageConverter:
|
||||
"""Converts man pages to HTML using mandoc.
|
||||
|
||||
Handles:
|
||||
- Converting troff to HTML using mandoc
|
||||
- Cleaning up HTML output
|
||||
- Parallel conversion of multiple man pages
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir: Path):
|
||||
"""Initialize converter.
|
||||
|
||||
Args:
|
||||
output_dir: Base directory for HTML output
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Check if mandoc is available
|
||||
if not self._check_mandoc():
|
||||
raise RuntimeError("mandoc is not installed or not in PATH")
|
||||
|
||||
@staticmethod
|
||||
def _check_mandoc() -> bool:
|
||||
"""Check if mandoc is available."""
|
||||
try:
|
||||
# Run mandoc with no arguments - it will show usage and exit
|
||||
# We just want to verify the command exists, not that it succeeds
|
||||
subprocess.run(
|
||||
['mandoc'],
|
||||
capture_output=True,
|
||||
timeout=5
|
||||
)
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
# mandoc command not found
|
||||
return False
|
||||
except Exception:
|
||||
# Other errors (timeout, etc) - but mandoc exists
|
||||
return True
|
||||
|
||||
def convert(self, man_file: ManFile, content: str) -> bool:
|
||||
"""Convert a single man page to HTML.
|
||||
|
||||
Args:
|
||||
man_file: ManFile object to convert
|
||||
content: Raw man page content (troff format)
|
||||
|
||||
Returns:
|
||||
True if conversion successful
|
||||
"""
|
||||
try:
|
||||
# Run mandoc to convert to HTML
|
||||
html = self._run_mandoc(content)
|
||||
if not html:
|
||||
logger.warning(f"mandoc produced no output for {man_file.display_name}")
|
||||
return False
|
||||
|
||||
# Clean up HTML
|
||||
html = self._clean_html(html)
|
||||
|
||||
# Store in ManFile object
|
||||
man_file.html_content = html
|
||||
|
||||
# Determine output path
|
||||
output_path = self._get_output_path(man_file)
|
||||
man_file.html_path = output_path
|
||||
|
||||
# Save HTML file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
logger.debug(f"Converted {man_file.display_name} -> {output_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting {man_file.display_name}: {e}")
|
||||
return False
|
||||
|
||||
def convert_many(
|
||||
self,
|
||||
man_files: List[tuple],
|
||||
max_workers: int = 10
|
||||
) -> List[ManFile]:
|
||||
"""Convert multiple man pages in parallel.
|
||||
|
||||
Args:
|
||||
man_files: List of (ManFile, content) tuples
|
||||
max_workers: Maximum number of parallel conversions
|
||||
|
||||
Returns:
|
||||
List of successfully converted ManFile objects
|
||||
"""
|
||||
converted = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all conversion tasks
|
||||
future_to_manfile = {
|
||||
executor.submit(self.convert, man_file, content): man_file
|
||||
for man_file, content in man_files
|
||||
}
|
||||
|
||||
# Collect results
|
||||
for future in as_completed(future_to_manfile):
|
||||
man_file = future_to_manfile[future]
|
||||
try:
|
||||
if future.result():
|
||||
converted.append(man_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting {man_file.display_name}: {e}")
|
||||
|
||||
logger.info(f"Converted {len(converted)}/{len(man_files)} man pages to HTML")
|
||||
return converted
|
||||
|
||||
def _run_mandoc(self, content: str) -> Optional[str]:
|
||||
"""Run mandoc to convert man page to HTML.
|
||||
|
||||
Args:
|
||||
content: Raw man page content
|
||||
|
||||
Returns:
|
||||
HTML output from mandoc, or None on error
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['mandoc', '-T', 'html', '-O', 'fragment,toc'],
|
||||
input=content.encode('utf-8'),
|
||||
capture_output=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
logger.warning(f"mandoc returned error: {stderr}")
|
||||
# Sometimes mandoc returns non-zero but still produces output
|
||||
if result.stdout:
|
||||
return result.stdout.decode('utf-8', errors='replace')
|
||||
return None
|
||||
|
||||
return result.stdout.decode('utf-8', errors='replace')
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("mandoc conversion timed out")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error running mandoc: {e}")
|
||||
return None
|
||||
|
||||
def _clean_html(self, html: str) -> str:
|
||||
"""Clean up mandoc HTML output.
|
||||
|
||||
Args:
|
||||
html: Raw HTML from mandoc
|
||||
|
||||
Returns:
|
||||
Cleaned HTML
|
||||
"""
|
||||
# Remove empty parentheses in header cells
|
||||
html = re.sub(
|
||||
r'<td class="head-ltitle">\(\)</td>',
|
||||
'<td class="head-ltitle"></td>',
|
||||
html
|
||||
)
|
||||
html = re.sub(
|
||||
r'<td class="head-rtitle">\(\)</td>',
|
||||
'<td class="head-rtitle"></td>',
|
||||
html
|
||||
)
|
||||
|
||||
# Strip leading/trailing whitespace
|
||||
html = html.strip()
|
||||
|
||||
return html
|
||||
|
||||
def link_cross_references(self, man_files: List[ManFile]) -> None:
|
||||
"""Add hyperlinks to cross-references in SEE ALSO sections.
|
||||
|
||||
Goes through all converted HTML files and converts man page references
|
||||
like pty(4) into working hyperlinks.
|
||||
|
||||
Args:
|
||||
man_files: List of all converted ManFile objects
|
||||
"""
|
||||
# Build lookup index: (name, section) -> relative_path
|
||||
lookup = {}
|
||||
for mf in man_files:
|
||||
key = (mf.name.lower(), str(mf.section))
|
||||
if key not in lookup:
|
||||
# Store the relative path from the version root
|
||||
lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}"
|
||||
|
||||
logger.info(f"Linking cross-references across {len(man_files)} man pages...")
|
||||
|
||||
# Process each man page HTML file
|
||||
for man_file in man_files:
|
||||
if not man_file.html_path or not man_file.html_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Read the HTML
|
||||
with open(man_file.html_path, 'r', encoding='utf-8') as f:
|
||||
html = f.read()
|
||||
|
||||
# Find and replace man page references
|
||||
# Mandoc outputs references as: <b>name</b>(section)
|
||||
# Pattern matches both <b>name</b>(section) and plain name(section)
|
||||
pattern = r'<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)'
|
||||
|
||||
def replace_reference(match):
|
||||
full_match = match.group(0)
|
||||
|
||||
# Check if this match is already inside an <a> tag
|
||||
# Look back up to 500 chars for context
|
||||
before_text = html[max(0, match.start()-500):match.start()]
|
||||
|
||||
# Find the last <a and last </a> before this match
|
||||
last_open = before_text.rfind('<a ')
|
||||
last_close = before_text.rfind('</a>')
|
||||
|
||||
# If the last <a> is after the last </a>, we're inside a link
|
||||
if last_open > last_close:
|
||||
return full_match
|
||||
|
||||
if match.group(1): # <b>name</b>(section) format
|
||||
name = match.group(1).lower()
|
||||
section = match.group(2)
|
||||
else: # plain name(section) format
|
||||
name = match.group(3).lower()
|
||||
section = match.group(4)
|
||||
|
||||
# Look up the referenced man page
|
||||
key = (name, section)
|
||||
if key in lookup:
|
||||
# Calculate relative path from current file to target
|
||||
target_path = lookup[key]
|
||||
# File structure: output_dir/version/package_name/manN/file.html
|
||||
# Need to go up 3 levels to reach version root
|
||||
# Current: package_name/manN/file.html
|
||||
# Target: other_package/manM/file.html
|
||||
rel_path = f"../../../{target_path}"
|
||||
return f'<a href="{rel_path}">{full_match}</a>'
|
||||
|
||||
return full_match
|
||||
|
||||
updated_html = re.sub(pattern, replace_reference, html)
|
||||
|
||||
# Only write if something changed
|
||||
if updated_html != html:
|
||||
with open(man_file.html_path, 'w', encoding='utf-8') as f:
|
||||
f.write(updated_html)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error linking references in {man_file.display_name}: {e}")
|
||||
|
||||
logger.info("Cross-reference linking complete")
|
||||
|
||||
def _get_output_path(self, man_file: ManFile) -> Path:
|
||||
"""Determine output path for HTML file.
|
||||
|
||||
Structure: output_dir/<package>/<section>/<name>.<section>[.<lang>].html
|
||||
|
||||
Args:
|
||||
man_file: ManFile object
|
||||
|
||||
Returns:
|
||||
Path for HTML output
|
||||
"""
|
||||
# Package directory
|
||||
pkg_dir = self.output_dir / man_file.package_name
|
||||
|
||||
# Section directory (man1, man2, etc.)
|
||||
section_dir = pkg_dir / f"man{man_file.section}"
|
||||
|
||||
# HTML filename
|
||||
filename = man_file.html_filename
|
||||
|
||||
return section_dir / filename
|
||||
222
src/rocky_man/processor/extractor.py
Normal file
222
src/rocky_man/processor/extractor.py
Normal file
@@ -0,0 +1,222 @@
|
||||
"""Extract man pages from RPM packages."""
|
||||
|
||||
import gzip
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import rpmfile
|
||||
|
||||
from ..models import Package, ManFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ManPageExtractor:
|
||||
"""Extracts man pages from RPM packages.
|
||||
|
||||
Handles:
|
||||
- Extracting man pages from RPMs
|
||||
- Reading gzipped man page content
|
||||
- Organizing extracted files by package
|
||||
"""
|
||||
|
||||
def __init__(self, extract_dir: Path, skip_sections: List[str] = None, skip_languages: bool = True):
|
||||
"""Initialize extractor.
|
||||
|
||||
Args:
|
||||
extract_dir: Base directory for extracting man pages
|
||||
skip_sections: List of man sections to skip (e.g., ['3', '3p', '3pm'])
|
||||
skip_languages: If True, skip non-English man pages
|
||||
"""
|
||||
self.extract_dir = Path(extract_dir)
|
||||
self.extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.skip_sections = skip_sections or []
|
||||
self.skip_languages = skip_languages
|
||||
|
||||
def extract_from_package(self, package: Package) -> List[ManFile]:
|
||||
"""Extract all man pages from a package.
|
||||
|
||||
Args:
|
||||
package: Package to extract from
|
||||
|
||||
Returns:
|
||||
List of ManFile objects for extracted man pages
|
||||
"""
|
||||
if not package.download_path or not package.download_path.exists():
|
||||
logger.warning(f"Package file not found: {package.name}")
|
||||
return []
|
||||
|
||||
# Create extraction directory for this package
|
||||
pkg_extract_dir = self.extract_dir / package.name
|
||||
pkg_extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
man_files = []
|
||||
|
||||
try:
|
||||
logger.info(f"Extracting man pages from {package.filename}")
|
||||
|
||||
with rpmfile.open(package.download_path) as rpm:
|
||||
for member in rpm.getmembers():
|
||||
# Check if this is a man page file
|
||||
if not self._is_manpage(member.name):
|
||||
continue
|
||||
|
||||
# Create ManFile object
|
||||
extract_path = pkg_extract_dir / member.name.lstrip('/')
|
||||
man_file = ManFile(
|
||||
file_path=extract_path,
|
||||
package_name=package.name
|
||||
)
|
||||
|
||||
# Apply section filtering
|
||||
if self.skip_sections and man_file.section in self.skip_sections:
|
||||
logger.debug(f"Skipping {man_file.display_name} (section {man_file.section})")
|
||||
continue
|
||||
|
||||
# Apply language filtering
|
||||
if self.skip_languages and man_file.language and man_file.language != 'en':
|
||||
logger.debug(f"Skipping {man_file.display_name} (language {man_file.language})")
|
||||
continue
|
||||
|
||||
# Extract the file
|
||||
extract_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
content = rpm.extractfile(member).read()
|
||||
with open(extract_path, 'wb') as f:
|
||||
f.write(content)
|
||||
|
||||
man_file.content = content
|
||||
man_files.append(man_file)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract {member.name}: {e}")
|
||||
|
||||
logger.info(f"Extracted {len(man_files)} man pages from {package.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from {package.filename}: {e}")
|
||||
|
||||
return man_files
|
||||
|
||||
def extract_from_packages(
|
||||
self,
|
||||
packages: List[Package],
|
||||
max_workers: int = 5
|
||||
) -> List[ManFile]:
|
||||
"""Extract man pages from multiple packages in parallel.
|
||||
|
||||
Args:
|
||||
packages: List of packages to process
|
||||
max_workers: Maximum number of parallel extractions
|
||||
|
||||
Returns:
|
||||
List of all extracted ManFile objects
|
||||
"""
|
||||
all_man_files = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all extraction tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.extract_from_package, pkg): pkg
|
||||
for pkg in packages
|
||||
}
|
||||
|
||||
# Collect results
|
||||
for future in as_completed(future_to_pkg):
|
||||
pkg = future_to_pkg[future]
|
||||
try:
|
||||
man_files = future.result()
|
||||
all_man_files.extend(man_files)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {pkg.name}: {e}")
|
||||
|
||||
logger.info(f"Extracted total of {len(all_man_files)} man pages from {len(packages)} packages")
|
||||
return all_man_files
|
||||
|
||||
def read_manpage_content(self, man_file: ManFile) -> str:
|
||||
"""Read and decompress man page content.
|
||||
|
||||
Args:
|
||||
man_file: ManFile to read
|
||||
|
||||
Returns:
|
||||
Decompressed man page content as string
|
||||
"""
|
||||
if not man_file.file_path.exists():
|
||||
logger.warning(f"Man page file not found: {man_file.file_path}")
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Try reading as gzipped file first
|
||||
if man_file.file_path.suffix == '.gz':
|
||||
with gzip.open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
else:
|
||||
# Read as plain text
|
||||
with open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
|
||||
# Decode with error handling
|
||||
return content.decode('utf-8', errors='replace')
|
||||
|
||||
except gzip.BadGzipFile:
|
||||
# Not a gzip file, try reading as plain text
|
||||
try:
|
||||
with open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
return content.decode('utf-8', errors='replace')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {man_file.file_path}: {e}")
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {man_file.file_path}: {e}")
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _is_manpage(path: str) -> bool:
|
||||
"""Check if a file path is a man page.
|
||||
|
||||
Args:
|
||||
path: File path to check
|
||||
|
||||
Returns:
|
||||
True if this looks like a man page file
|
||||
"""
|
||||
# Must contain /man/ in path
|
||||
if '/man/' not in path:
|
||||
return False
|
||||
|
||||
# Should be in /usr/share/man/ or /usr/man/
|
||||
if not ('/share/man/' in path or path.startswith('/usr/man/')):
|
||||
return False
|
||||
|
||||
# Common man page patterns
|
||||
# - /usr/share/man/man1/foo.1.gz
|
||||
# - /usr/share/man/es/man1/foo.1.gz
|
||||
# - /usr/share/man/man3/printf.3.gz
|
||||
|
||||
parts = path.split('/')
|
||||
|
||||
# Check for man<digit> directory
|
||||
has_man_section = any(
|
||||
part.startswith('man') and len(part) > 3 and part[3].isdigit()
|
||||
for part in parts
|
||||
)
|
||||
|
||||
return has_man_section
|
||||
|
||||
def cleanup_extracts(self, package: Package):
|
||||
"""Clean up extracted files for a package.
|
||||
|
||||
Args:
|
||||
package: Package whose extracts to clean up
|
||||
"""
|
||||
pkg_extract_dir = self.extract_dir / package.name
|
||||
if pkg_extract_dir.exists():
|
||||
import shutil
|
||||
shutil.rmtree(pkg_extract_dir)
|
||||
logger.debug(f"Cleaned up extracts for {package.name}")
|
||||
4
src/rocky_man/repo/__init__.py
Normal file
4
src/rocky_man/repo/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .manager import RepoManager
|
||||
from .contents import ContentsParser
|
||||
|
||||
__all__ = ["RepoManager", "ContentsParser"]
|
||||
221
src/rocky_man/repo/contents.py
Normal file
221
src/rocky_man/repo/contents.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""Contents file parser for identifying packages with man pages."""
|
||||
|
||||
import gzip
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from typing import Set, Dict
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ContentsParser:
|
||||
"""Parse repository metadata to identify packages containing man pages.
|
||||
|
||||
This is a key optimization - instead of downloading all packages,
|
||||
we parse the filelists.xml to find only packages with man pages.
|
||||
"""
|
||||
|
||||
def __init__(self, repo_url: str, cache_dir: Path):
|
||||
"""Initialize the contents parser.
|
||||
|
||||
Args:
|
||||
repo_url: Base URL of the repository (e.g., .../BaseOS/x86_64/os/)
|
||||
cache_dir: Directory to cache downloaded metadata
|
||||
"""
|
||||
self.repo_url = repo_url.rstrip('/') + '/'
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_packages_with_manpages(self) -> Set[str]:
|
||||
"""Get set of package names that contain man pages.
|
||||
|
||||
Returns:
|
||||
Set of package names (e.g., {'bash', 'coreutils', ...})
|
||||
"""
|
||||
logger.info(f"Fetching filelists for {self.repo_url}")
|
||||
|
||||
# Download and parse repomd.xml to find filelists location
|
||||
filelists_path = self._get_filelists_path()
|
||||
if not filelists_path:
|
||||
logger.warning("Could not find filelists in repository metadata")
|
||||
return set()
|
||||
|
||||
# Download filelists.xml
|
||||
filelists_file = self._download_filelists(filelists_path)
|
||||
if not filelists_file:
|
||||
logger.warning("Could not download filelists")
|
||||
return set()
|
||||
|
||||
# Parse filelists to find packages with man pages
|
||||
packages = self._parse_filelists(filelists_file)
|
||||
logger.info(f"Found {len(packages)} packages with man pages")
|
||||
|
||||
return packages
|
||||
|
||||
def _get_filelists_path(self) -> str:
|
||||
"""Parse repomd.xml to get the filelists.xml location.
|
||||
|
||||
Returns:
|
||||
Relative path to filelists.xml.gz
|
||||
"""
|
||||
repomd_url = urljoin(self.repo_url, 'repodata/repomd.xml')
|
||||
|
||||
try:
|
||||
response = requests.get(repomd_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
# Find filelists entry
|
||||
# XML structure: <repomd><data type="filelists"><location href="..."/></data></repomd>
|
||||
ns = {'repo': 'http://linux.duke.edu/metadata/repo'}
|
||||
|
||||
for data in root.findall('repo:data', ns):
|
||||
if data.get('type') == 'filelists':
|
||||
location = data.find('repo:location', ns)
|
||||
if location is not None:
|
||||
return location.get('href')
|
||||
|
||||
# Fallback: try without namespace
|
||||
for data in root.findall('data'):
|
||||
if data.get('type') == 'filelists':
|
||||
location = data.find('location')
|
||||
if location is not None:
|
||||
return location.get('href')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing repomd.xml: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _download_filelists(self, relative_path: str) -> Path:
|
||||
"""Download filelists.xml.gz file.
|
||||
|
||||
Args:
|
||||
relative_path: Relative path from repo root (e.g., 'repodata/...-filelists.xml.gz')
|
||||
|
||||
Returns:
|
||||
Path to downloaded file
|
||||
"""
|
||||
url = urljoin(self.repo_url, relative_path)
|
||||
cache_file = self.cache_dir / relative_path.split('/')[-1]
|
||||
|
||||
# Return cached file if it exists
|
||||
if cache_file.exists():
|
||||
logger.debug(f"Using cached filelists: {cache_file}")
|
||||
return cache_file
|
||||
|
||||
try:
|
||||
logger.info(f"Downloading {url}")
|
||||
response = requests.get(url, timeout=60, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
cache_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(cache_file, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
return cache_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading filelists: {e}")
|
||||
return None
|
||||
|
||||
def _parse_filelists(self, filelists_path: Path) -> Set[str]:
|
||||
"""Parse filelists.xml.gz to find packages with man pages.
|
||||
|
||||
Args:
|
||||
filelists_path: Path to filelists.xml.gz file
|
||||
|
||||
Returns:
|
||||
Set of package names containing man pages
|
||||
"""
|
||||
packages = set()
|
||||
|
||||
try:
|
||||
# Open gzipped XML file
|
||||
with gzip.open(filelists_path, 'rb') as f:
|
||||
# Use iterparse for memory efficiency (files can be large)
|
||||
context = ET.iterparse(f, events=('start', 'end'))
|
||||
|
||||
current_package = None
|
||||
has_manpage = False
|
||||
|
||||
for event, elem in context:
|
||||
if event == 'start':
|
||||
if elem.tag.endswith('package'):
|
||||
# Get package name from 'name' attribute
|
||||
current_package = elem.get('name')
|
||||
has_manpage = False
|
||||
|
||||
elif event == 'end':
|
||||
if elem.tag.endswith('file'):
|
||||
# Check if file path contains /man/
|
||||
file_path = elem.text
|
||||
if file_path and '/man/' in file_path:
|
||||
# Could be /usr/share/man/ or /usr/man/
|
||||
if '/share/man/' in file_path or file_path.startswith('/usr/man/'):
|
||||
has_manpage = True
|
||||
|
||||
elif elem.tag.endswith('package'):
|
||||
# End of package entry
|
||||
if has_manpage and current_package:
|
||||
packages.add(current_package)
|
||||
|
||||
# Clear element to free memory
|
||||
elem.clear()
|
||||
current_package = None
|
||||
has_manpage = False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing filelists: {e}")
|
||||
|
||||
return packages
|
||||
|
||||
def get_package_man_files(self, filelists_path: Path) -> Dict[str, list]:
|
||||
"""Get detailed list of man files for each package.
|
||||
|
||||
Args:
|
||||
filelists_path: Path to filelists.xml.gz file
|
||||
|
||||
Returns:
|
||||
Dict mapping package name to list of man page paths
|
||||
"""
|
||||
packages = {}
|
||||
|
||||
try:
|
||||
with gzip.open(filelists_path, 'rb') as f:
|
||||
context = ET.iterparse(f, events=('start', 'end'))
|
||||
|
||||
current_package = None
|
||||
current_files = []
|
||||
|
||||
for event, elem in context:
|
||||
if event == 'start':
|
||||
if elem.tag.endswith('package'):
|
||||
current_package = elem.get('name')
|
||||
current_files = []
|
||||
|
||||
elif event == 'end':
|
||||
if elem.tag.endswith('file'):
|
||||
file_path = elem.text
|
||||
if file_path and '/share/man/' in file_path:
|
||||
current_files.append(file_path)
|
||||
|
||||
elif elem.tag.endswith('package'):
|
||||
if current_files and current_package:
|
||||
packages[current_package] = current_files
|
||||
|
||||
elem.clear()
|
||||
current_package = None
|
||||
current_files = []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing filelists: {e}")
|
||||
|
||||
return packages
|
||||
237
src/rocky_man/repo/manager.py
Normal file
237
src/rocky_man/repo/manager.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""Repository manager for querying and downloading RPM packages."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Set, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import dnf
|
||||
import requests
|
||||
|
||||
from ..models import Package
|
||||
from .contents import ContentsParser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RepoManager:
|
||||
"""Manages Rocky Linux repository operations.
|
||||
|
||||
Handles:
|
||||
- Repository configuration with DNF
|
||||
- Package discovery and filtering
|
||||
- Package downloads with progress tracking
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_url: str,
|
||||
version: str,
|
||||
repo_type: str,
|
||||
arch: str,
|
||||
cache_dir: Path,
|
||||
download_dir: Path,
|
||||
):
|
||||
"""Initialize repository manager.
|
||||
|
||||
Args:
|
||||
repo_url: Full repository URL
|
||||
version: Rocky Linux version (e.g., '9.5')
|
||||
repo_type: Repository type ('BaseOS' or 'AppStream')
|
||||
arch: Architecture (e.g., 'x86_64')
|
||||
cache_dir: Directory for caching metadata
|
||||
download_dir: Directory for downloading packages
|
||||
"""
|
||||
self.repo_url = repo_url
|
||||
self.version = version
|
||||
self.repo_type = repo_type
|
||||
self.arch = arch
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.download_dir = Path(download_dir)
|
||||
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.download_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize DNF
|
||||
self.base = dnf.Base()
|
||||
self.base.conf.debuglevel = 0
|
||||
self.base.conf.errorlevel = 0
|
||||
self.base.conf.cachedir = str(self.cache_dir / "dnf")
|
||||
|
||||
self._configure_repo()
|
||||
self.packages_with_manpages: Optional[Set[str]] = None
|
||||
|
||||
def _configure_repo(self):
|
||||
"""Configure DNF repository."""
|
||||
repo_id = f"rocky-{self.repo_type.lower()}-{self.version}-{self.arch}"
|
||||
repo = dnf.repo.Repo(repo_id, self.base.conf)
|
||||
repo.baseurl = [self.repo_url]
|
||||
repo.enabled = True
|
||||
repo.gpgcheck = False # We verify checksums separately
|
||||
|
||||
self.base.repos.add(repo)
|
||||
logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
|
||||
|
||||
# Fill the sack (package database)
|
||||
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
||||
logger.info("Repository metadata loaded")
|
||||
|
||||
def discover_packages_with_manpages(self) -> Set[str]:
|
||||
"""Discover which packages contain man pages using filelists.
|
||||
|
||||
This is the key optimization - we parse repository metadata
|
||||
to identify packages with man pages before downloading anything.
|
||||
|
||||
Returns:
|
||||
Set of package names that contain man pages
|
||||
"""
|
||||
if self.packages_with_manpages is not None:
|
||||
return self.packages_with_manpages
|
||||
|
||||
parser = ContentsParser(self.repo_url, self.cache_dir)
|
||||
self.packages_with_manpages = parser.get_packages_with_manpages()
|
||||
|
||||
return self.packages_with_manpages
|
||||
|
||||
def list_packages(self, with_manpages_only: bool = True) -> List[Package]:
|
||||
"""List all packages in the repository.
|
||||
|
||||
Args:
|
||||
with_manpages_only: If True, only return packages with man pages
|
||||
|
||||
Returns:
|
||||
List of Package objects
|
||||
"""
|
||||
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})")
|
||||
|
||||
# Get packages with man pages if filtering
|
||||
manpage_packages = None
|
||||
if with_manpages_only:
|
||||
manpage_packages = self.discover_packages_with_manpages()
|
||||
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
|
||||
|
||||
packages = []
|
||||
|
||||
# Query all available packages
|
||||
query = self.base.sack.query().available()
|
||||
|
||||
# For each package name, get only one arch (prefer noarch, then our target arch)
|
||||
seen_names = set()
|
||||
|
||||
for pkg in query:
|
||||
pkg_name = pkg.name
|
||||
|
||||
# Skip if we've already added this package
|
||||
if pkg_name in seen_names:
|
||||
continue
|
||||
|
||||
# Skip if filtering and package doesn't have man pages
|
||||
if manpage_packages and pkg_name not in manpage_packages:
|
||||
continue
|
||||
|
||||
# Get repo information
|
||||
repo = pkg.repo
|
||||
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
|
||||
|
||||
# Create Package object
|
||||
package = Package(
|
||||
name=pkg_name,
|
||||
version=pkg.version,
|
||||
release=pkg.release,
|
||||
arch=pkg.arch,
|
||||
repo_type=self.repo_type,
|
||||
location=pkg.location,
|
||||
baseurl=baseurl,
|
||||
checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
|
||||
checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
|
||||
has_manpages=True if manpage_packages else False,
|
||||
)
|
||||
|
||||
packages.append(package)
|
||||
seen_names.add(pkg_name)
|
||||
|
||||
logger.info(f"Found {len(packages)} packages to process")
|
||||
return sorted(packages) # Sort by name for consistent ordering
|
||||
|
||||
def download_package(self, package: Package) -> bool:
|
||||
"""Download a single package.
|
||||
|
||||
Args:
|
||||
package: Package to download
|
||||
|
||||
Returns:
|
||||
True if download successful, False otherwise
|
||||
"""
|
||||
download_path = self.download_dir / package.filename
|
||||
package.download_path = download_path
|
||||
|
||||
# Skip if already downloaded
|
||||
if download_path.exists():
|
||||
logger.debug(f"Package already downloaded: {package.filename}")
|
||||
return True
|
||||
|
||||
try:
|
||||
logger.info(f"Downloading {package.filename}")
|
||||
response = requests.get(package.download_url, timeout=300, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Download with progress (optional: could add progress bar here)
|
||||
with open(download_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
logger.debug(f"Downloaded: {package.filename}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {package.filename}: {e}")
|
||||
# Clean up partial download
|
||||
if download_path.exists():
|
||||
download_path.unlink()
|
||||
return False
|
||||
|
||||
def download_packages(
|
||||
self,
|
||||
packages: List[Package],
|
||||
max_workers: int = 5
|
||||
) -> List[Package]:
|
||||
"""Download multiple packages in parallel.
|
||||
|
||||
Args:
|
||||
packages: List of packages to download
|
||||
max_workers: Maximum number of parallel downloads
|
||||
|
||||
Returns:
|
||||
List of successfully downloaded packages
|
||||
"""
|
||||
downloaded = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all download tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.download_package, pkg): pkg
|
||||
for pkg in packages
|
||||
}
|
||||
|
||||
# Process completed downloads
|
||||
for future in as_completed(future_to_pkg):
|
||||
pkg = future_to_pkg[future]
|
||||
try:
|
||||
if future.result():
|
||||
downloaded.append(pkg)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {pkg.name}: {e}")
|
||||
|
||||
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages")
|
||||
return downloaded
|
||||
|
||||
def cleanup_package(self, package: Package):
|
||||
"""Delete a downloaded package file.
|
||||
|
||||
Args:
|
||||
package: Package to clean up
|
||||
"""
|
||||
if package.download_path and package.download_path.exists():
|
||||
package.download_path.unlink()
|
||||
logger.debug(f"Deleted: {package.filename}")
|
||||
3
src/rocky_man/utils/__init__.py
Normal file
3
src/rocky_man/utils/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .config import Config
|
||||
|
||||
__all__ = ["Config"]
|
||||
110
src/rocky_man/utils/config.py
Normal file
110
src/rocky_man/utils/config.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Configuration management for Rocky Man."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""Configuration for Rocky Man page generation.
|
||||
|
||||
Attributes:
|
||||
base_url: Base URL for Rocky Linux mirror
|
||||
content_dir: Content directory path (usually 'pub/rocky')
|
||||
versions: List of Rocky Linux versions to process (e.g., ['8.10', '9.5'])
|
||||
architectures: List of architectures to consider (we'll pick one)
|
||||
repo_types: Repository types to process (e.g., ['BaseOS', 'AppStream'])
|
||||
download_dir: Directory for downloading RPM packages
|
||||
extract_dir: Directory for extracting man pages
|
||||
output_dir: Directory for generated HTML files
|
||||
keep_rpms: Whether to keep downloaded RPM files after processing
|
||||
keep_extracts: Whether to keep extracted man files after processing
|
||||
parallel_downloads: Number of parallel downloads
|
||||
parallel_conversions: Number of parallel HTML conversions
|
||||
"""
|
||||
|
||||
# Repository configuration
|
||||
base_url: str = "http://dl.rockylinux.org/"
|
||||
content_dir: str = "pub/rocky"
|
||||
versions: List[str] = None
|
||||
architectures: List[str] = None
|
||||
repo_types: List[str] = None
|
||||
|
||||
# Directory configuration
|
||||
download_dir: Path = Path("/data/tmp/downloads")
|
||||
extract_dir: Path = Path("/data/tmp/extracts")
|
||||
output_dir: Path = Path("/data/html")
|
||||
|
||||
# Cleanup options
|
||||
keep_rpms: bool = False
|
||||
keep_extracts: bool = False
|
||||
|
||||
# Performance options
|
||||
parallel_downloads: int = 5
|
||||
parallel_conversions: int = 10
|
||||
|
||||
# Filtering options
|
||||
skip_sections: List[str] = None
|
||||
skip_packages: List[str] = None
|
||||
skip_languages: bool = True # Skip non-English languages by default
|
||||
allow_all_sections: bool = False # Override skip_sections if True
|
||||
|
||||
def __post_init__(self):
|
||||
"""Set defaults and ensure directories exist."""
|
||||
if self.versions is None:
|
||||
self.versions = ["8.10", "9.6", "10.0"]
|
||||
|
||||
if self.architectures is None:
|
||||
# Man pages are arch-independent, so we just need one
|
||||
# We prefer x86_64 as it's most common, fallback to others
|
||||
self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"]
|
||||
|
||||
if self.repo_types is None:
|
||||
self.repo_types = ["BaseOS", "AppStream"]
|
||||
|
||||
# Set default skip sections (man3 library APIs)
|
||||
if self.skip_sections is None and not self.allow_all_sections:
|
||||
self.skip_sections = ["3", "3p", "3pm"]
|
||||
elif self.allow_all_sections:
|
||||
self.skip_sections = []
|
||||
|
||||
# Set default skip packages (high-volume API docs)
|
||||
if self.skip_packages is None:
|
||||
self.skip_packages = [
|
||||
"lapack",
|
||||
"dpdk-devel",
|
||||
"gl-manpages",
|
||||
]
|
||||
|
||||
# Ensure all paths are Path objects
|
||||
self.download_dir = Path(self.download_dir)
|
||||
self.extract_dir = Path(self.extract_dir)
|
||||
self.output_dir = Path(self.output_dir)
|
||||
|
||||
def get_repo_url(self, version: str, repo_type: str, arch: str) -> str:
|
||||
"""Construct repository URL for given parameters.
|
||||
|
||||
Args:
|
||||
version: Rocky Linux version (e.g., '9.5')
|
||||
repo_type: Repository type ('BaseOS' or 'AppStream')
|
||||
arch: Architecture (e.g., 'x86_64')
|
||||
|
||||
Returns:
|
||||
Full repository URL
|
||||
"""
|
||||
url = self.base_url.rstrip('/')
|
||||
path = f"{self.content_dir}/{version}/{repo_type}/{arch}/os"
|
||||
return f"{url}/{path}/"
|
||||
|
||||
def get_version_output_dir(self, version: str) -> Path:
|
||||
"""Get output directory for a specific version."""
|
||||
return self.output_dir / version
|
||||
|
||||
def get_version_download_dir(self, version: str) -> Path:
|
||||
"""Get download directory for a specific version."""
|
||||
return self.download_dir / version
|
||||
|
||||
def get_version_extract_dir(self, version: str) -> Path:
|
||||
"""Get extract directory for a specific version."""
|
||||
return self.extract_dir / version
|
||||
3
src/rocky_man/web/__init__.py
Normal file
3
src/rocky_man/web/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .generator import WebGenerator
|
||||
|
||||
__all__ = ["WebGenerator"]
|
||||
297
src/rocky_man/web/generator.py
Normal file
297
src/rocky_man/web/generator.py
Normal file
@@ -0,0 +1,297 @@
|
||||
"""Web page generator for Rocky Man."""
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
||||
|
||||
from ..models import ManFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WebGenerator:
|
||||
"""Generates web pages and search index for Rocky Man.
|
||||
|
||||
Handles:
|
||||
- Generating index/search page
|
||||
- Wrapping man page HTML in templates
|
||||
- Creating search index JSON
|
||||
"""
|
||||
|
||||
def __init__(self, template_dir: Path, output_dir: Path):
|
||||
"""Initialize web generator.
|
||||
|
||||
Args:
|
||||
template_dir: Directory containing Jinja2 templates
|
||||
output_dir: Directory for HTML output
|
||||
"""
|
||||
self.template_dir = Path(template_dir)
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup Jinja2 environment
|
||||
self.env = Environment(
|
||||
loader=FileSystemLoader(str(self.template_dir)),
|
||||
autoescape=select_autoescape(['html', 'xml'])
|
||||
)
|
||||
|
||||
def generate_manpage_html(self, man_file: ManFile, version: str) -> bool:
|
||||
"""Generate complete HTML page for a man page.
|
||||
|
||||
Args:
|
||||
man_file: ManFile with html_content already set
|
||||
version: Rocky Linux version
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
if not man_file.html_content:
|
||||
logger.warning(f"No HTML content for {man_file.display_name}")
|
||||
return False
|
||||
|
||||
try:
|
||||
template = self.env.get_template('manpage.html')
|
||||
|
||||
html = template.render(
|
||||
title=f"{man_file.display_name} - {man_file.package_name} - Rocky Linux {version}",
|
||||
header_title=man_file.display_name,
|
||||
package_name=man_file.package_name,
|
||||
version=version,
|
||||
section=man_file.section,
|
||||
language=man_file.language or 'en',
|
||||
content=man_file.html_content
|
||||
)
|
||||
|
||||
# Ensure output path is set
|
||||
if not man_file.html_path:
|
||||
man_file.html_path = self._get_manpage_path(man_file, version)
|
||||
|
||||
man_file.html_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(man_file.html_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating HTML for {man_file.display_name}: {e}")
|
||||
return False
|
||||
|
||||
def generate_index(self, version: str, search_data: Dict[str, Any]) -> bool:
|
||||
"""Generate search/index page for a version.
|
||||
|
||||
Args:
|
||||
version: Rocky Linux version
|
||||
search_data: Search index data
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
template = self.env.get_template('index.html')
|
||||
|
||||
html = template.render(
|
||||
title=f"Rocky Linux {version} Man Pages",
|
||||
version=version,
|
||||
total_pages=len(search_data),
|
||||
packages=sorted(search_data.keys())
|
||||
)
|
||||
|
||||
index_path = self.output_dir / version / 'index.html'
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info(f"Generated index for version {version}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating index for {version}: {e}")
|
||||
return False
|
||||
|
||||
def generate_packages_index(self, version: str, search_data: Dict[str, Any]) -> bool:
|
||||
"""Generate full packages index page.
|
||||
|
||||
Args:
|
||||
version: Rocky Linux version
|
||||
search_data: Search index data
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
# Group packages by first letter
|
||||
packages_by_letter = {}
|
||||
|
||||
for pkg_name, pages in search_data.items():
|
||||
first_char = pkg_name[0].upper()
|
||||
if not first_char.isalpha():
|
||||
first_char = 'other'
|
||||
|
||||
if first_char not in packages_by_letter:
|
||||
packages_by_letter[first_char] = []
|
||||
|
||||
packages_by_letter[first_char].append({
|
||||
'name': pkg_name,
|
||||
'count': len(pages)
|
||||
})
|
||||
|
||||
# Sort packages within each letter
|
||||
for letter in packages_by_letter:
|
||||
packages_by_letter[letter].sort(key=lambda x: x['name'])
|
||||
|
||||
template = self.env.get_template('packages.html')
|
||||
|
||||
html = template.render(
|
||||
title=f"All Packages - Rocky Linux {version}",
|
||||
version=version,
|
||||
total_packages=len(search_data),
|
||||
packages_by_letter=packages_by_letter
|
||||
)
|
||||
|
||||
output_path = self.output_dir / version / 'packages.html'
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info(f"Generated packages index for version {version}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating packages index for {version}: {e}")
|
||||
return False
|
||||
|
||||
def generate_search_index(
|
||||
self,
|
||||
man_files: List[ManFile],
|
||||
version: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate search index from man files.
|
||||
|
||||
Args:
|
||||
man_files: List of ManFile objects
|
||||
version: Rocky Linux version
|
||||
|
||||
Returns:
|
||||
Search index dictionary
|
||||
"""
|
||||
index = {}
|
||||
|
||||
for man_file in man_files:
|
||||
pkg_name = man_file.package_name
|
||||
|
||||
if pkg_name not in index:
|
||||
index[pkg_name] = {}
|
||||
|
||||
# Create entry for this man page
|
||||
entry = {
|
||||
'name': man_file.name,
|
||||
'section': man_file.section,
|
||||
'display_name': man_file.display_name,
|
||||
'language': man_file.language or 'en',
|
||||
'url': man_file.uri_path,
|
||||
'full_name': f"{man_file.package_name} - {man_file.display_name}"
|
||||
}
|
||||
|
||||
# Use display name as key (handles duplicates with different sections)
|
||||
key = man_file.display_name
|
||||
if man_file.language:
|
||||
key = f"{key}.{man_file.language}"
|
||||
|
||||
index[pkg_name][key] = entry
|
||||
|
||||
return index
|
||||
|
||||
def save_search_index(self, index: Dict[str, Any], version: str) -> bool:
|
||||
"""Save search index as JSON (both plain and gzipped).
|
||||
|
||||
Args:
|
||||
index: Search index dictionary
|
||||
version: Rocky Linux version
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
version_dir = self.output_dir / version
|
||||
version_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
json_path = version_dir / 'search.json'
|
||||
gz_path = version_dir / 'search.json.gz'
|
||||
|
||||
# Sort for consistency
|
||||
sorted_index = {k: index[k] for k in sorted(index)}
|
||||
|
||||
# Save plain JSON
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(sorted_index, f, indent=2)
|
||||
|
||||
# Save gzipped JSON
|
||||
with gzip.open(gz_path, 'wt', encoding='utf-8') as f:
|
||||
json.dump(sorted_index, f)
|
||||
|
||||
logger.info(f"Saved search index for {version} ({len(index)} packages)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving search index: {e}")
|
||||
return False
|
||||
|
||||
def _get_manpage_path(self, man_file: ManFile, version: str) -> Path:
|
||||
"""Get output path for a man page HTML file.
|
||||
|
||||
Args:
|
||||
man_file: ManFile object
|
||||
version: Rocky Linux version
|
||||
|
||||
Returns:
|
||||
Path for HTML file
|
||||
"""
|
||||
version_dir = self.output_dir / version
|
||||
pkg_dir = version_dir / man_file.package_name
|
||||
section_dir = pkg_dir / f"man{man_file.section}"
|
||||
|
||||
return section_dir / man_file.html_filename
|
||||
|
||||
def generate_root_index(self, versions: List[str]) -> bool:
|
||||
"""Generate root index page linking to all versions.
|
||||
|
||||
Args:
|
||||
versions: List of Rocky Linux versions
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
template = self.env.get_template('root.html')
|
||||
|
||||
# Sort versions numerically (e.g., 8.10, 9.6, 10.0)
|
||||
def version_key(v):
|
||||
try:
|
||||
parts = v.split('.')
|
||||
return tuple(int(p) for p in parts)
|
||||
except (ValueError, AttributeError):
|
||||
return (0, 0)
|
||||
|
||||
html = template.render(
|
||||
title="Rocky Linux Man Pages",
|
||||
versions=sorted(versions, key=version_key)
|
||||
)
|
||||
|
||||
index_path = self.output_dir / 'index.html'
|
||||
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info("Generated root index page")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating root index: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user