"""Main entry point for Rocky Man.""" import argparse import logging import sys from pathlib import Path from .utils.config import Config from .repo import RepoManager from .processor import ManPageExtractor, ManPageConverter from .web import WebGenerator def setup_logging(verbose: bool = False): """Configure logging.""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) def process_version( config: Config, version: str, template_dir: Path ) -> bool: """Process a single Rocky Linux version. Args: config: Configuration object version: Rocky Linux version to process template_dir: Path to templates directory Returns: True if successful """ logger = logging.getLogger(__name__) logger.info(f"Processing Rocky Linux {version}") # Setup directories for this version version_download_dir = config.get_version_download_dir(version) version_extract_dir = config.get_version_extract_dir(version) version_output_dir = config.get_version_output_dir(version) all_man_files = [] # Process each repository type for repo_type in config.repo_types: logger.info(f"Processing {repo_type} repository") # Use first available architecture (man pages are arch-independent) arch = config.architectures[0] # Get repository URL repo_url = config.get_repo_url(version, repo_type, arch) # Create cache dir for this repo cache_dir = config.download_dir / f".cache/{version}/{repo_type}" try: # Initialize repository manager repo_manager = RepoManager( repo_url=repo_url, version=version, repo_type=repo_type, arch=arch, cache_dir=cache_dir, download_dir=version_download_dir ) # List packages (with man pages only) packages = repo_manager.list_packages(with_manpages_only=True) if not packages: logger.warning(f"No packages found in {repo_type}") continue logger.info(f"Found {len(packages)} packages with man pages in {repo_type}") # Filter out packages that should be skipped if config.skip_packages: original_count = len(packages) packages = [ pkg for pkg in packages if pkg.name not in config.skip_packages ] filtered_count = original_count - len(packages) if filtered_count > 0: logger.info(f"Filtered out {filtered_count} packages based on skip list") logger.info(f"Processing {len(packages)} packages") # Download packages logger.info("Downloading packages...") downloaded = repo_manager.download_packages( packages, max_workers=config.parallel_downloads ) # Extract man pages logger.info("Extracting man pages...") extractor = ManPageExtractor( version_extract_dir, skip_sections=config.skip_sections, skip_languages=config.skip_languages ) man_files = extractor.extract_from_packages( downloaded, max_workers=config.parallel_downloads ) logger.info(f"Extracted {len(man_files)} man pages") # Read content for each man file logger.info("Reading man page content...") man_files_with_content = [] for man_file in man_files: content = extractor.read_manpage_content(man_file) if content: man_files_with_content.append((man_file, content)) # Convert to HTML logger.info("Converting man pages to HTML...") converter = ManPageConverter(version_output_dir) converted = converter.convert_many( man_files_with_content, max_workers=config.parallel_conversions ) all_man_files.extend(converted) # Cleanup if requested if not config.keep_rpms: logger.info("Cleaning up downloaded packages...") for package in downloaded: repo_manager.cleanup_package(package) if not config.keep_extracts: logger.info("Cleaning up extracted files...") for package in downloaded: extractor.cleanup_extracts(package) except Exception as e: logger.error(f"Error processing {repo_type}: {e}", exc_info=True) continue if not all_man_files: logger.error(f"No man pages were successfully processed for version {version}") return False # Link cross-references between man pages logger.info("Linking cross-references...") converter = ManPageConverter(version_output_dir) converter.link_cross_references(all_man_files) # Generate web pages logger.info("Generating web pages...") web_gen = WebGenerator(template_dir, config.output_dir) # Generate search index search_index = web_gen.generate_search_index(all_man_files, version) web_gen.save_search_index(search_index, version) # Generate index page web_gen.generate_index(version, search_index) # Generate packages index page web_gen.generate_packages_index(version, search_index) # Wrap man pages in templates logger.info("Generating man page HTML...") for man_file in all_man_files: web_gen.generate_manpage_html(man_file, version) logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}") return True def main(): """Main entry point.""" parser = argparse.ArgumentParser( description='Generate HTML documentation for Rocky Linux man pages' ) parser.add_argument( '--versions', nargs='+', default=['8.10', '9.6', '10.0'], help='Rocky Linux versions to process (default: 8.10 9.6 10.0)' ) parser.add_argument( '--repo-types', nargs='+', default=['BaseOS', 'AppStream'], help='Repository types to process (default: BaseOS AppStream)' ) parser.add_argument( '--output-dir', type=Path, default=Path('./html'), help='Output directory for HTML files (default: ./html)' ) parser.add_argument( '--download-dir', type=Path, default=Path('./tmp/downloads'), help='Directory for downloading packages (default: ./tmp/downloads)' ) parser.add_argument( '--extract-dir', type=Path, default=Path('./tmp/extracts'), help='Directory for extracting man pages (default: ./tmp/extracts)' ) parser.add_argument( '--keep-rpms', action='store_true', help='Keep downloaded RPM files after processing' ) parser.add_argument( '--keep-extracts', action='store_true', help='Keep extracted man files after processing' ) parser.add_argument( '--parallel-downloads', type=int, default=5, help='Number of parallel downloads (default: 5)' ) parser.add_argument( '--parallel-conversions', type=int, default=10, help='Number of parallel HTML conversions (default: 10)' ) parser.add_argument( '--mirror', default='http://dl.rockylinux.org/', help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)' ) parser.add_argument( '--template-dir', type=Path, default=Path(__file__).parent.parent.parent / 'templates', help='Template directory (default: ./templates)' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Enable verbose logging' ) parser.add_argument( '--skip-sections', nargs='*', default=None, help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.' ) parser.add_argument( '--skip-packages', nargs='*', default=None, help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.' ) parser.add_argument( '--skip-languages', action='store_true', default=None, help='Skip non-English man pages (default: enabled)' ) parser.add_argument( '--keep-languages', action='store_true', help='Keep all languages (disables --skip-languages)' ) parser.add_argument( '--allow-all-sections', action='store_true', help='Include all man sections (overrides --skip-sections)' ) args = parser.parse_args() # Setup logging setup_logging(args.verbose) logger = logging.getLogger(__name__) # Handle filtering options skip_languages = True # default if args.keep_languages: skip_languages = False elif args.skip_languages is not None: skip_languages = args.skip_languages # Create configuration config = Config( base_url=args.mirror, versions=args.versions, repo_types=args.repo_types, download_dir=args.download_dir, extract_dir=args.extract_dir, output_dir=args.output_dir, keep_rpms=args.keep_rpms, keep_extracts=args.keep_extracts, parallel_downloads=args.parallel_downloads, parallel_conversions=args.parallel_conversions, skip_sections=args.skip_sections, skip_packages=args.skip_packages, skip_languages=skip_languages, allow_all_sections=args.allow_all_sections ) logger.info("Rocky Man - Rocky Linux Man Page Generator") logger.info(f"Versions: {', '.join(config.versions)}") logger.info(f"Repositories: {', '.join(config.repo_types)}") logger.info(f"Output directory: {config.output_dir}") # Log filtering configuration if config.skip_sections: logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}") else: logger.info("Including all man sections") if config.skip_packages: logger.info(f"Skipping packages: {', '.join(config.skip_packages)}") if config.skip_languages: logger.info("Skipping non-English languages") else: logger.info("Including all languages") # Process each version processed_versions = [] for version in config.versions: try: if process_version(config, version, args.template_dir): processed_versions.append(version) except Exception as e: logger.error(f"Failed to process version {version}: {e}", exc_info=True) if not processed_versions: logger.error("No versions were successfully processed") return 1 # Generate root index logger.info("Generating root index page...") web_gen = WebGenerator(args.template_dir, config.output_dir) web_gen.generate_root_index(processed_versions) logger.info("=" * 60) logger.info("Processing complete!") logger.info(f"Generated documentation for: {', '.join(processed_versions)}") logger.info(f"Output directory: {config.output_dir.absolute()}") logger.info("=" * 60) return 0 if __name__ == '__main__': sys.exit(main())