CUSP-1256 (#1)

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

* Complete refactor

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>

---------

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
Stephen Simpson
2025-11-20 12:16:33 -05:00
committed by GitHub
parent 5248edad62
commit ec32c72363
44 changed files with 4083 additions and 1540 deletions

377
src/rocky_man/main.py Normal file
View File

@@ -0,0 +1,377 @@
"""Main entry point for Rocky Man."""
import argparse
import logging
import sys
from pathlib import Path
from .utils.config import Config
from .repo import RepoManager
from .processor import ManPageExtractor, ManPageConverter
from .web import WebGenerator
def setup_logging(verbose: bool = False):
"""Configure logging."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
def process_version(
config: Config,
version: str,
template_dir: Path
) -> bool:
"""Process a single Rocky Linux version.
Args:
config: Configuration object
version: Rocky Linux version to process
template_dir: Path to templates directory
Returns:
True if successful
"""
logger = logging.getLogger(__name__)
logger.info(f"Processing Rocky Linux {version}")
# Setup directories for this version
version_download_dir = config.get_version_download_dir(version)
version_extract_dir = config.get_version_extract_dir(version)
version_output_dir = config.get_version_output_dir(version)
all_man_files = []
# Process each repository type
for repo_type in config.repo_types:
logger.info(f"Processing {repo_type} repository")
# Use first available architecture (man pages are arch-independent)
arch = config.architectures[0]
# Get repository URL
repo_url = config.get_repo_url(version, repo_type, arch)
# Create cache dir for this repo
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
try:
# Initialize repository manager
repo_manager = RepoManager(
repo_url=repo_url,
version=version,
repo_type=repo_type,
arch=arch,
cache_dir=cache_dir,
download_dir=version_download_dir
)
# List packages (with man pages only)
packages = repo_manager.list_packages(with_manpages_only=True)
if not packages:
logger.warning(f"No packages found in {repo_type}")
continue
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
# Filter out packages that should be skipped
if config.skip_packages:
original_count = len(packages)
packages = [
pkg for pkg in packages
if pkg.name not in config.skip_packages
]
filtered_count = original_count - len(packages)
if filtered_count > 0:
logger.info(f"Filtered out {filtered_count} packages based on skip list")
logger.info(f"Processing {len(packages)} packages")
# Download packages
logger.info("Downloading packages...")
downloaded = repo_manager.download_packages(
packages,
max_workers=config.parallel_downloads
)
# Extract man pages
logger.info("Extracting man pages...")
extractor = ManPageExtractor(
version_extract_dir,
skip_sections=config.skip_sections,
skip_languages=config.skip_languages
)
man_files = extractor.extract_from_packages(
downloaded,
max_workers=config.parallel_downloads
)
logger.info(f"Extracted {len(man_files)} man pages")
# Read content for each man file
logger.info("Reading man page content...")
man_files_with_content = []
for man_file in man_files:
content = extractor.read_manpage_content(man_file)
if content:
man_files_with_content.append((man_file, content))
# Convert to HTML
logger.info("Converting man pages to HTML...")
converter = ManPageConverter(version_output_dir)
converted = converter.convert_many(
man_files_with_content,
max_workers=config.parallel_conversions
)
all_man_files.extend(converted)
# Cleanup if requested
if not config.keep_rpms:
logger.info("Cleaning up downloaded packages...")
for package in downloaded:
repo_manager.cleanup_package(package)
if not config.keep_extracts:
logger.info("Cleaning up extracted files...")
for package in downloaded:
extractor.cleanup_extracts(package)
except Exception as e:
logger.error(f"Error processing {repo_type}: {e}", exc_info=True)
continue
if not all_man_files:
logger.error(f"No man pages were successfully processed for version {version}")
return False
# Link cross-references between man pages
logger.info("Linking cross-references...")
converter = ManPageConverter(version_output_dir)
converter.link_cross_references(all_man_files)
# Generate web pages
logger.info("Generating web pages...")
web_gen = WebGenerator(template_dir, config.output_dir)
# Generate search index
search_index = web_gen.generate_search_index(all_man_files, version)
web_gen.save_search_index(search_index, version)
# Generate index page
web_gen.generate_index(version, search_index)
# Generate packages index page
web_gen.generate_packages_index(version, search_index)
# Wrap man pages in templates
logger.info("Generating man page HTML...")
for man_file in all_man_files:
web_gen.generate_manpage_html(man_file, version)
logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}")
return True
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description='Generate HTML documentation for Rocky Linux man pages'
)
parser.add_argument(
'--versions',
nargs='+',
default=['8.10', '9.6', '10.0'],
help='Rocky Linux versions to process (default: 8.10 9.6 10.0)'
)
parser.add_argument(
'--repo-types',
nargs='+',
default=['BaseOS', 'AppStream'],
help='Repository types to process (default: BaseOS AppStream)'
)
parser.add_argument(
'--output-dir',
type=Path,
default=Path('./html'),
help='Output directory for HTML files (default: ./html)'
)
parser.add_argument(
'--download-dir',
type=Path,
default=Path('./tmp/downloads'),
help='Directory for downloading packages (default: ./tmp/downloads)'
)
parser.add_argument(
'--extract-dir',
type=Path,
default=Path('./tmp/extracts'),
help='Directory for extracting man pages (default: ./tmp/extracts)'
)
parser.add_argument(
'--keep-rpms',
action='store_true',
help='Keep downloaded RPM files after processing'
)
parser.add_argument(
'--keep-extracts',
action='store_true',
help='Keep extracted man files after processing'
)
parser.add_argument(
'--parallel-downloads',
type=int,
default=5,
help='Number of parallel downloads (default: 5)'
)
parser.add_argument(
'--parallel-conversions',
type=int,
default=10,
help='Number of parallel HTML conversions (default: 10)'
)
parser.add_argument(
'--mirror',
default='http://dl.rockylinux.org/',
help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)'
)
parser.add_argument(
'--template-dir',
type=Path,
default=Path(__file__).parent.parent.parent / 'templates',
help='Template directory (default: ./templates)'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Enable verbose logging'
)
parser.add_argument(
'--skip-sections',
nargs='*',
default=None,
help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.'
)
parser.add_argument(
'--skip-packages',
nargs='*',
default=None,
help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.'
)
parser.add_argument(
'--skip-languages',
action='store_true',
default=None,
help='Skip non-English man pages (default: enabled)'
)
parser.add_argument(
'--keep-languages',
action='store_true',
help='Keep all languages (disables --skip-languages)'
)
parser.add_argument(
'--allow-all-sections',
action='store_true',
help='Include all man sections (overrides --skip-sections)'
)
args = parser.parse_args()
# Setup logging
setup_logging(args.verbose)
logger = logging.getLogger(__name__)
# Handle filtering options
skip_languages = True # default
if args.keep_languages:
skip_languages = False
elif args.skip_languages is not None:
skip_languages = args.skip_languages
# Create configuration
config = Config(
base_url=args.mirror,
versions=args.versions,
repo_types=args.repo_types,
download_dir=args.download_dir,
extract_dir=args.extract_dir,
output_dir=args.output_dir,
keep_rpms=args.keep_rpms,
keep_extracts=args.keep_extracts,
parallel_downloads=args.parallel_downloads,
parallel_conversions=args.parallel_conversions,
skip_sections=args.skip_sections,
skip_packages=args.skip_packages,
skip_languages=skip_languages,
allow_all_sections=args.allow_all_sections
)
logger.info("Rocky Man - Rocky Linux Man Page Generator")
logger.info(f"Versions: {', '.join(config.versions)}")
logger.info(f"Repositories: {', '.join(config.repo_types)}")
logger.info(f"Output directory: {config.output_dir}")
# Log filtering configuration
if config.skip_sections:
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
else:
logger.info("Including all man sections")
if config.skip_packages:
logger.info(f"Skipping packages: {', '.join(config.skip_packages)}")
if config.skip_languages:
logger.info("Skipping non-English languages")
else:
logger.info("Including all languages")
# Process each version
processed_versions = []
for version in config.versions:
try:
if process_version(config, version, args.template_dir):
processed_versions.append(version)
except Exception as e:
logger.error(f"Failed to process version {version}: {e}", exc_info=True)
if not processed_versions:
logger.error("No versions were successfully processed")
return 1
# Generate root index
logger.info("Generating root index page...")
web_gen = WebGenerator(args.template_dir, config.output_dir)
web_gen.generate_root_index(processed_versions)
logger.info("=" * 60)
logger.info("Processing complete!")
logger.info(f"Generated documentation for: {', '.join(processed_versions)}")
logger.info(f"Output directory: {config.output_dir.absolute()}")
logger.info("=" * 60)
return 0
if __name__ == '__main__':
sys.exit(main())