diff --git a/src/rocky_man/main.py b/src/rocky_man/main.py index 253ec19..b17a06b 100644 --- a/src/rocky_man/main.py +++ b/src/rocky_man/main.py @@ -16,16 +16,12 @@ def setup_logging(verbose: bool = False): level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", ) -def process_version( - config: Config, - version: str, - template_dir: Path -) -> bool: +def process_version(config: Config, version: str, template_dir: Path) -> bool: """Process a single Rocky Linux version. Args: @@ -67,7 +63,7 @@ def process_version( repo_type=repo_type, arch=arch, cache_dir=cache_dir, - download_dir=version_download_dir + download_dir=version_download_dir, ) # List packages (with man pages only) @@ -83,19 +79,19 @@ def process_version( if config.skip_packages: original_count = len(packages) packages = [ - pkg for pkg in packages - if pkg.name not in config.skip_packages + pkg for pkg in packages if pkg.name not in config.skip_packages ] filtered_count = original_count - len(packages) if filtered_count > 0: - logger.info(f"Filtered out {filtered_count} packages based on skip list") + logger.info( + f"Filtered out {filtered_count} packages based on skip list" + ) logger.info(f"Processing {len(packages)} packages") # Download packages logger.info("Downloading packages...") downloaded = repo_manager.download_packages( - packages, - max_workers=config.parallel_downloads + packages, max_workers=config.parallel_downloads ) # Extract man pages @@ -103,11 +99,10 @@ def process_version( extractor = ManPageExtractor( version_extract_dir, skip_sections=config.skip_sections, - skip_languages=config.skip_languages + skip_languages=config.skip_languages, ) man_files = extractor.extract_from_packages( - downloaded, - max_workers=config.parallel_downloads + downloaded, max_workers=config.parallel_downloads ) logger.info(f"Extracted {len(man_files)} man pages") @@ -124,8 +119,7 @@ def process_version( logger.info("Converting man pages to HTML...") converter = ManPageConverter(version_output_dir) converted = converter.convert_many( - man_files_with_content, - max_workers=config.parallel_conversions + man_files_with_content, max_workers=config.parallel_conversions ) all_man_files.extend(converted) @@ -163,132 +157,141 @@ def process_version( # Generate packages index page web_gen.generate_packages_index(version, search_index) + # Set HTML paths for all man files + for man_file in all_man_files: + if not man_file.html_path: + man_file.html_path = web_gen._get_manpage_path(man_file, version) + + # Link cross-references between man pages + logger.info("Linking cross-references...") + converter.link_cross_references(all_man_files, version) + # Wrap man pages in templates logger.info("Generating man page HTML...") for man_file in all_man_files: web_gen.generate_manpage_html(man_file, version) - logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}") + logger.info( + f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}" + ) return True def main(): """Main entry point.""" parser = argparse.ArgumentParser( - description='Generate HTML documentation for Rocky Linux man pages' + description="Generate HTML documentation for Rocky Linux man pages" ) parser.add_argument( - '--versions', - nargs='+', - default=['8.10', '9.6', '10.0'], - help='Rocky Linux versions to process (default: 8.10 9.6 10.0)' + "--versions", + nargs="+", + default=["8.10", "9.6", "10.0"], + help="Rocky Linux versions to process (default: 8.10 9.6 10.0)", ) parser.add_argument( - '--repo-types', - nargs='+', - default=['BaseOS', 'AppStream'], - help='Repository types to process (default: BaseOS AppStream)' + "--repo-types", + nargs="+", + default=["BaseOS", "AppStream"], + help="Repository types to process (default: BaseOS AppStream)", ) parser.add_argument( - '--output-dir', + "--output-dir", type=Path, - default=Path('./html'), - help='Output directory for HTML files (default: ./html)' + default=Path("./html"), + help="Output directory for HTML files (default: ./html)", ) parser.add_argument( - '--download-dir', + "--download-dir", type=Path, - default=Path('./tmp/downloads'), - help='Directory for downloading packages (default: ./tmp/downloads)' + default=Path("./tmp/downloads"), + help="Directory for downloading packages (default: ./tmp/downloads)", ) parser.add_argument( - '--extract-dir', + "--extract-dir", type=Path, - default=Path('./tmp/extracts'), - help='Directory for extracting man pages (default: ./tmp/extracts)' + default=Path("./tmp/extracts"), + help="Directory for extracting man pages (default: ./tmp/extracts)", ) parser.add_argument( - '--keep-rpms', - action='store_true', - help='Keep downloaded RPM files after processing' + "--keep-rpms", + action="store_true", + help="Keep downloaded RPM files after processing", ) parser.add_argument( - '--keep-extracts', - action='store_true', - help='Keep extracted man files after processing' + "--keep-extracts", + action="store_true", + help="Keep extracted man files after processing", ) parser.add_argument( - '--parallel-downloads', + "--parallel-downloads", type=int, default=5, - help='Number of parallel downloads (default: 5)' + help="Number of parallel downloads (default: 5)", ) parser.add_argument( - '--parallel-conversions', + "--parallel-conversions", type=int, default=10, - help='Number of parallel HTML conversions (default: 10)' + help="Number of parallel HTML conversions (default: 10)", ) parser.add_argument( - '--mirror', - default='http://dl.rockylinux.org/', - help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)' + "--mirror", + default="http://dl.rockylinux.org/", + help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)", ) parser.add_argument( - '--template-dir', + "--template-dir", type=Path, - default=Path(__file__).parent.parent.parent / 'templates', - help='Template directory (default: ./templates)' + default=Path(__file__).parent.parent.parent / "templates", + help="Template directory (default: ./templates)", ) parser.add_argument( - '-v', '--verbose', - action='store_true', - help='Enable verbose logging' + "-v", "--verbose", action="store_true", help="Enable verbose logging" ) parser.add_argument( - '--skip-sections', - nargs='*', + "--skip-sections", + nargs="*", default=None, - help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.' + help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.", ) parser.add_argument( - '--skip-packages', - nargs='*', + "--skip-packages", + nargs="*", default=None, - help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.' + help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.", ) parser.add_argument( - '--skip-languages', - action='store_true', + "--skip-languages", + action="store_true", default=None, - help='Skip non-English man pages (default: enabled)' + help="Skip non-English man pages (default: enabled)", ) parser.add_argument( - '--keep-languages', - action='store_true', - help='Keep all languages (disables --skip-languages)' + "--keep-languages", + action="store_true", + help="Keep all languages (disables --skip-languages)", ) parser.add_argument( - '--allow-all-sections', - action='store_true', - help='Include all man sections (overrides --skip-sections)' + "--allow-all-sections", + action="store_true", + help="Include all man sections (overrides --skip-sections)", ) args = parser.parse_args() @@ -319,7 +322,7 @@ def main(): skip_sections=args.skip_sections, skip_packages=args.skip_packages, skip_languages=skip_languages, - allow_all_sections=args.allow_all_sections + allow_all_sections=args.allow_all_sections, ) logger.info("Rocky Man - Rocky Linux Man Page Generator") @@ -368,5 +371,5 @@ def main(): return 0 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/src/rocky_man/processor/converter.py b/src/rocky_man/processor/converter.py index 1479cbc..6dee0b2 100644 --- a/src/rocky_man/processor/converter.py +++ b/src/rocky_man/processor/converter.py @@ -227,9 +227,91 @@ class ManPageConverter: View the manual page

''' - return html + def link_cross_references(self, man_files: List[ManFile], version: str) -> None: + """Add hyperlinks to cross-references in SEE ALSO sections. + + Goes through all converted HTML files and converts man page references + like pty(4) into working hyperlinks. + + Args: + man_files: List of all converted ManFile objects + """ + # Build lookup index: (name, section) -> relative_path + lookup = {} + for mf in man_files: + key = (mf.name.lower(), str(mf.section)) + if key not in lookup: + # Store the relative path from the version root + lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}" + + logger.info(f"Linking cross-references across {len(man_files)} man pages...") + + # Process each man page HTML content + for man_file in man_files: + if not man_file.html_content: + continue + + try: + html = man_file.html_content + + # Find and replace man page references + # Mandoc outputs references as: name(section) + # Pattern matches both name(section) and plain name(section) + pattern = ( + r"([\w\-_.]+)\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)" + ) + + def replace_reference(match): + full_match = match.group(0) + + # Check if this match is already inside an tag + # Look back up to 500 chars for context + before_text = html[max(0, match.start() - 500) : match.start()] + + # Find the last before this match + last_open = before_text.rfind("") + + # If the last is after the last , we're inside a link + if last_open > last_close: + return full_match + + if match.group(1): # name(section) format + name = match.group(1).lower() + section = match.group(2) + else: # plain name(section) format + name = match.group(3).lower() + section = match.group(4) + + # Look up the referenced man page + key = (name, section) + if key in lookup: + # Calculate relative path from current file to target + target_path = lookup[key] + # File structure: output_dir/version/package_name/manN/file.html + # Need to go up 3 levels to reach output root, then down to version/target + # Current: version/package_name/manN/file.html + # Target: version/other_package/manM/file.html + rel_path = f"../../../{version}/{target_path}" + return f'{full_match}' + + return full_match + + updated_html = re.sub(pattern, replace_reference, html) + + # Update the content if something changed + if updated_html != html: + man_file.html_content = updated_html + + except Exception as e: + logger.warning( + f"Error linking references in {man_file.display_name}: {e}" + ) + + logger.info("Cross-reference linking complete") + def _get_output_path(self, man_file: ManFile) -> Path: """Determine output path for HTML file.