Files
rocky-man/src/rocky_man/main.py
2025-12-05 09:19:52 -06:00

411 lines
13 KiB
Python

"""Main entry point for Rocky Man."""
import argparse
import logging
import re
import sys
from pathlib import Path
from .utils.config import Config
from .repo import RepoManager
from .processor import ManPageExtractor, ManPageConverter
from .web import WebGenerator
def setup_logging(verbose: bool = False):
"""Configure logging."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
def process_version(config: Config, version: str, template_dir: Path) -> bool:
"""Process a single Rocky Linux version.
Args:
config: Configuration object
version: Rocky Linux version to process
template_dir: Path to templates directory
Returns:
True if successful
"""
logger = logging.getLogger(__name__)
logger.info(f"Processing Rocky Linux {version}")
# Setup directories for this version
version_download_dir = config.get_version_download_dir(version)
version_extract_dir = config.get_version_extract_dir(version)
version_output_dir = config.get_version_output_dir(version)
all_man_files = []
# Process each repository type
for repo_type in config.repo_types:
logger.info(f"Processing {repo_type} repository")
# Use first available architecture (man pages are arch-independent)
arch = config.architectures[0]
# Create cache dir for this repo
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
try:
# Initialize repository manager
repo_manager = RepoManager(
config=config,
version=version,
repo_type=repo_type,
arch=arch,
cache_dir=cache_dir,
download_dir=version_download_dir,
)
# List packages (with man pages only)
packages = repo_manager.list_packages(with_manpages_only=True)
if not packages:
logger.warning(f"No packages found in {repo_type}")
continue
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
# Filter out packages that should be skipped
if config.skip_packages:
original_count = len(packages)
packages = [
pkg for pkg in packages if pkg.name not in config.skip_packages
]
filtered_count = original_count - len(packages)
if filtered_count > 0:
logger.info(
f"Filtered out {filtered_count} packages based on skip list"
)
logger.info(f"Processing {len(packages)} packages")
# Download packages
logger.info("Downloading packages...")
downloaded = repo_manager.download_packages(
packages, max_workers=config.parallel_downloads
)
# Extract man pages
logger.info("Extracting man pages...")
extractor = ManPageExtractor(
version_extract_dir,
skip_sections=config.skip_sections,
skip_languages=config.skip_languages,
)
man_files = extractor.extract_from_packages(
downloaded, max_workers=config.parallel_downloads
)
logger.info(f"Extracted {len(man_files)} man pages")
# Read content for each man file
logger.info("Reading man page content...")
man_files_with_content = []
for man_file in man_files:
content = extractor.read_manpage_content(man_file)
if content:
man_files_with_content.append((man_file, content))
# Convert to HTML
logger.info("Converting man pages to HTML...")
converter = ManPageConverter(version_output_dir)
converted = converter.convert_many(
man_files_with_content, max_workers=config.parallel_conversions
)
all_man_files.extend(converted)
# Cleanup if requested
if not config.keep_rpms:
logger.info("Cleaning up downloaded packages...")
for package in downloaded:
repo_manager.cleanup_package(package)
if not config.keep_extracts:
logger.info("Cleaning up extracted files...")
for package in downloaded:
extractor.cleanup_extracts(package)
except Exception as e:
logger.error(f"Error processing {repo_type}: {e}", exc_info=True)
continue
if not all_man_files:
logger.error(f"No man pages were successfully processed for version {version}")
return False
# Generate web pages
logger.info("Generating web pages...")
web_gen = WebGenerator(template_dir, config.output_dir)
# Generate search index
search_index = web_gen.generate_search_index(all_man_files, version)
web_gen.save_search_index(search_index, version)
# Generate index page
web_gen.generate_index(version, search_index)
# Generate packages index page
web_gen.generate_packages_index(version, search_index)
# Set HTML paths for all man files
for man_file in all_man_files:
if not man_file.html_path:
man_file.html_path = web_gen._get_manpage_path(man_file, version)
# Link cross-references between man pages
logger.info("Linking cross-references...")
converter.link_cross_references(all_man_files, version)
# Wrap man pages in templates
logger.info("Generating man page HTML...")
for man_file in all_man_files:
web_gen.generate_manpage_html(man_file, version)
logger.info(
f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}"
)
return True
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Generate HTML documentation for Rocky Linux man pages"
)
parser.add_argument(
"--versions",
nargs="+",
default=["8.10", "9.6", "10.0"],
help="Rocky Linux versions to process (default: 8.10 9.6 10.0)",
)
parser.add_argument(
"--repo-types",
nargs="+",
default=["BaseOS", "AppStream"],
help="Repository types to process (default: BaseOS AppStream)",
)
parser.add_argument(
"--output-dir",
type=Path,
default=Path("./html"),
help="Output directory for HTML files (default: ./html)",
)
parser.add_argument(
"--download-dir",
type=Path,
default=Path("./tmp/downloads"),
help="Directory for downloading packages (default: ./tmp/downloads)",
)
parser.add_argument(
"--extract-dir",
type=Path,
default=Path("./tmp/extracts"),
help="Directory for extracting man pages (default: ./tmp/extracts)",
)
parser.add_argument(
"--keep-rpms",
action="store_true",
help="Keep downloaded RPM files after processing",
)
parser.add_argument(
"--keep-extracts",
action="store_true",
help="Keep extracted man files after processing",
)
parser.add_argument(
"--parallel-downloads",
type=int,
default=5,
help="Number of parallel downloads (default: 5)",
)
parser.add_argument(
"--parallel-conversions",
type=int,
default=10,
help="Number of parallel HTML conversions (default: 10)",
)
parser.add_argument(
"--mirror",
default="http://dl.rockylinux.org/",
help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)",
)
parser.add_argument(
"--vault",
action="store_true",
help="Use vault directory instead of pub (vault/rocky instead of pub/rocky)",
)
parser.add_argument(
"--existing-versions",
nargs="*",
metavar="VERSION",
help="List of existing versions to include in root index (e.g., 8.10 9.7)",
)
parser.add_argument(
"--template-dir",
type=Path,
default=Path(__file__).parent.parent.parent / "templates",
help="Template directory (default: ./templates)",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose logging"
)
parser.add_argument(
"--skip-sections",
nargs="*",
default=None,
help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.",
)
parser.add_argument(
"--skip-packages",
nargs="*",
default=None,
help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.",
)
parser.add_argument(
"--skip-languages",
action="store_true",
default=None,
help="Skip non-English man pages (default: enabled)",
)
parser.add_argument(
"--keep-languages",
action="store_true",
help="Keep all languages (disables --skip-languages)",
)
parser.add_argument(
"--allow-all-sections",
action="store_true",
help="Include all man sections (overrides --skip-sections)",
)
args = parser.parse_args()
# Setup logging
setup_logging(args.verbose)
logger = logging.getLogger(__name__)
# Handle filtering options
skip_languages = True # default
if args.keep_languages:
skip_languages = False
elif args.skip_languages is not None:
skip_languages = args.skip_languages
# Determine content directory
content_dir = "vault/rocky" if args.vault else "pub/rocky"
# Create configuration
config = Config(
base_url=args.mirror,
content_dir=content_dir,
versions=args.versions,
repo_types=args.repo_types,
download_dir=args.download_dir,
extract_dir=args.extract_dir,
output_dir=args.output_dir,
keep_rpms=args.keep_rpms,
keep_extracts=args.keep_extracts,
parallel_downloads=args.parallel_downloads,
parallel_conversions=args.parallel_conversions,
skip_sections=args.skip_sections,
skip_packages=args.skip_packages,
skip_languages=skip_languages,
allow_all_sections=args.allow_all_sections,
)
# Get existing versions from scan and argument
scanned_versions = [
d.name
for d in config.output_dir.iterdir()
if d.is_dir() and re.match(r"\d+\.\d+", d.name)
]
arg_versions = args.existing_versions or []
# Sort versions numerically by (major, minor)
def version_key(v):
try:
major, minor = v.split(".")
return (int(major), int(minor))
except (ValueError, AttributeError):
return (0, 0)
existing_versions = sorted(set(scanned_versions + arg_versions), key=version_key)
all_versions = sorted(set(existing_versions + config.versions), key=version_key)
logger.info("Rocky Man - Rocky Linux Man Page Generator")
logger.info(f"Versions to process: {', '.join(config.versions)}")
logger.info(f"All known versions: {', '.join(all_versions)}")
logger.info(f"Repositories: {', '.join(config.repo_types)}")
logger.info(f"Output directory: {config.output_dir}")
# Log filtering configuration
if config.skip_sections:
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
else:
logger.info("Including all man sections")
if config.skip_packages:
logger.info(f"Skipping packages: {', '.join(config.skip_packages)}")
if config.skip_languages:
logger.info("Skipping non-English languages")
else:
logger.info("Including all languages")
# Process each version
processed_versions = []
for version in config.versions:
try:
if process_version(config, version, args.template_dir):
processed_versions.append(version)
except Exception as e:
logger.error(f"Failed to process version {version}: {e}", exc_info=True)
if not processed_versions:
logger.error("No versions were successfully processed")
return 1
# Generate root index
logger.info("Generating root index page...")
web_gen = WebGenerator(args.template_dir, config.output_dir)
web_gen.generate_root_index(all_versions)
logger.info("=" * 60)
logger.info("Processing complete!")
logger.info(f"Generated documentation for: {', '.join(processed_versions)}")
logger.info(f"Output directory: {config.output_dir.absolute()}")
logger.info("=" * 60)
return 0
if __name__ == "__main__":
sys.exit(main())