From 890d7fc8f9be4d259c0ad7c8168e5a046155424d Mon Sep 17 00:00:00 2001
From: Stephen Simpson
Date: Thu, 4 Dec 2025 12:40:26 -0600
Subject: [PATCH] CUSP-1342 - Fix See Also
Signed-off-by: Stephen Simpson
---
src/rocky_man/main.py | 149 ++++++++++++++-------------
src/rocky_man/processor/converter.py | 84 ++++++++++++++-
2 files changed, 159 insertions(+), 74 deletions(-)
diff --git a/src/rocky_man/main.py b/src/rocky_man/main.py
index 253ec19..b17a06b 100644
--- a/src/rocky_man/main.py
+++ b/src/rocky_man/main.py
@@ -16,16 +16,12 @@ def setup_logging(verbose: bool = False):
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- datefmt='%Y-%m-%d %H:%M:%S'
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ datefmt="%Y-%m-%d %H:%M:%S",
)
-def process_version(
- config: Config,
- version: str,
- template_dir: Path
-) -> bool:
+def process_version(config: Config, version: str, template_dir: Path) -> bool:
"""Process a single Rocky Linux version.
Args:
@@ -67,7 +63,7 @@ def process_version(
repo_type=repo_type,
arch=arch,
cache_dir=cache_dir,
- download_dir=version_download_dir
+ download_dir=version_download_dir,
)
# List packages (with man pages only)
@@ -83,19 +79,19 @@ def process_version(
if config.skip_packages:
original_count = len(packages)
packages = [
- pkg for pkg in packages
- if pkg.name not in config.skip_packages
+ pkg for pkg in packages if pkg.name not in config.skip_packages
]
filtered_count = original_count - len(packages)
if filtered_count > 0:
- logger.info(f"Filtered out {filtered_count} packages based on skip list")
+ logger.info(
+ f"Filtered out {filtered_count} packages based on skip list"
+ )
logger.info(f"Processing {len(packages)} packages")
# Download packages
logger.info("Downloading packages...")
downloaded = repo_manager.download_packages(
- packages,
- max_workers=config.parallel_downloads
+ packages, max_workers=config.parallel_downloads
)
# Extract man pages
@@ -103,11 +99,10 @@ def process_version(
extractor = ManPageExtractor(
version_extract_dir,
skip_sections=config.skip_sections,
- skip_languages=config.skip_languages
+ skip_languages=config.skip_languages,
)
man_files = extractor.extract_from_packages(
- downloaded,
- max_workers=config.parallel_downloads
+ downloaded, max_workers=config.parallel_downloads
)
logger.info(f"Extracted {len(man_files)} man pages")
@@ -124,8 +119,7 @@ def process_version(
logger.info("Converting man pages to HTML...")
converter = ManPageConverter(version_output_dir)
converted = converter.convert_many(
- man_files_with_content,
- max_workers=config.parallel_conversions
+ man_files_with_content, max_workers=config.parallel_conversions
)
all_man_files.extend(converted)
@@ -163,132 +157,141 @@ def process_version(
# Generate packages index page
web_gen.generate_packages_index(version, search_index)
+ # Set HTML paths for all man files
+ for man_file in all_man_files:
+ if not man_file.html_path:
+ man_file.html_path = web_gen._get_manpage_path(man_file, version)
+
+ # Link cross-references between man pages
+ logger.info("Linking cross-references...")
+ converter.link_cross_references(all_man_files, version)
+
# Wrap man pages in templates
logger.info("Generating man page HTML...")
for man_file in all_man_files:
web_gen.generate_manpage_html(man_file, version)
- logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}")
+ logger.info(
+ f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}"
+ )
return True
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
- description='Generate HTML documentation for Rocky Linux man pages'
+ description="Generate HTML documentation for Rocky Linux man pages"
)
parser.add_argument(
- '--versions',
- nargs='+',
- default=['8.10', '9.6', '10.0'],
- help='Rocky Linux versions to process (default: 8.10 9.6 10.0)'
+ "--versions",
+ nargs="+",
+ default=["8.10", "9.6", "10.0"],
+ help="Rocky Linux versions to process (default: 8.10 9.6 10.0)",
)
parser.add_argument(
- '--repo-types',
- nargs='+',
- default=['BaseOS', 'AppStream'],
- help='Repository types to process (default: BaseOS AppStream)'
+ "--repo-types",
+ nargs="+",
+ default=["BaseOS", "AppStream"],
+ help="Repository types to process (default: BaseOS AppStream)",
)
parser.add_argument(
- '--output-dir',
+ "--output-dir",
type=Path,
- default=Path('./html'),
- help='Output directory for HTML files (default: ./html)'
+ default=Path("./html"),
+ help="Output directory for HTML files (default: ./html)",
)
parser.add_argument(
- '--download-dir',
+ "--download-dir",
type=Path,
- default=Path('./tmp/downloads'),
- help='Directory for downloading packages (default: ./tmp/downloads)'
+ default=Path("./tmp/downloads"),
+ help="Directory for downloading packages (default: ./tmp/downloads)",
)
parser.add_argument(
- '--extract-dir',
+ "--extract-dir",
type=Path,
- default=Path('./tmp/extracts'),
- help='Directory for extracting man pages (default: ./tmp/extracts)'
+ default=Path("./tmp/extracts"),
+ help="Directory for extracting man pages (default: ./tmp/extracts)",
)
parser.add_argument(
- '--keep-rpms',
- action='store_true',
- help='Keep downloaded RPM files after processing'
+ "--keep-rpms",
+ action="store_true",
+ help="Keep downloaded RPM files after processing",
)
parser.add_argument(
- '--keep-extracts',
- action='store_true',
- help='Keep extracted man files after processing'
+ "--keep-extracts",
+ action="store_true",
+ help="Keep extracted man files after processing",
)
parser.add_argument(
- '--parallel-downloads',
+ "--parallel-downloads",
type=int,
default=5,
- help='Number of parallel downloads (default: 5)'
+ help="Number of parallel downloads (default: 5)",
)
parser.add_argument(
- '--parallel-conversions',
+ "--parallel-conversions",
type=int,
default=10,
- help='Number of parallel HTML conversions (default: 10)'
+ help="Number of parallel HTML conversions (default: 10)",
)
parser.add_argument(
- '--mirror',
- default='http://dl.rockylinux.org/',
- help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)'
+ "--mirror",
+ default="http://dl.rockylinux.org/",
+ help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)",
)
parser.add_argument(
- '--template-dir',
+ "--template-dir",
type=Path,
- default=Path(__file__).parent.parent.parent / 'templates',
- help='Template directory (default: ./templates)'
+ default=Path(__file__).parent.parent.parent / "templates",
+ help="Template directory (default: ./templates)",
)
parser.add_argument(
- '-v', '--verbose',
- action='store_true',
- help='Enable verbose logging'
+ "-v", "--verbose", action="store_true", help="Enable verbose logging"
)
parser.add_argument(
- '--skip-sections',
- nargs='*',
+ "--skip-sections",
+ nargs="*",
default=None,
- help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.'
+ help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.",
)
parser.add_argument(
- '--skip-packages',
- nargs='*',
+ "--skip-packages",
+ nargs="*",
default=None,
- help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.'
+ help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.",
)
parser.add_argument(
- '--skip-languages',
- action='store_true',
+ "--skip-languages",
+ action="store_true",
default=None,
- help='Skip non-English man pages (default: enabled)'
+ help="Skip non-English man pages (default: enabled)",
)
parser.add_argument(
- '--keep-languages',
- action='store_true',
- help='Keep all languages (disables --skip-languages)'
+ "--keep-languages",
+ action="store_true",
+ help="Keep all languages (disables --skip-languages)",
)
parser.add_argument(
- '--allow-all-sections',
- action='store_true',
- help='Include all man sections (overrides --skip-sections)'
+ "--allow-all-sections",
+ action="store_true",
+ help="Include all man sections (overrides --skip-sections)",
)
args = parser.parse_args()
@@ -319,7 +322,7 @@ def main():
skip_sections=args.skip_sections,
skip_packages=args.skip_packages,
skip_languages=skip_languages,
- allow_all_sections=args.allow_all_sections
+ allow_all_sections=args.allow_all_sections,
)
logger.info("Rocky Man - Rocky Linux Man Page Generator")
@@ -368,5 +371,5 @@ def main():
return 0
-if __name__ == '__main__':
+if __name__ == "__main__":
sys.exit(main())
diff --git a/src/rocky_man/processor/converter.py b/src/rocky_man/processor/converter.py
index 1479cbc..6dee0b2 100644
--- a/src/rocky_man/processor/converter.py
+++ b/src/rocky_man/processor/converter.py
@@ -227,9 +227,91 @@ class ManPageConverter:
View the manual page
'''
-
return html
+ def link_cross_references(self, man_files: List[ManFile], version: str) -> None:
+ """Add hyperlinks to cross-references in SEE ALSO sections.
+
+ Goes through all converted HTML files and converts man page references
+ like pty(4) into working hyperlinks.
+
+ Args:
+ man_files: List of all converted ManFile objects
+ """
+ # Build lookup index: (name, section) -> relative_path
+ lookup = {}
+ for mf in man_files:
+ key = (mf.name.lower(), str(mf.section))
+ if key not in lookup:
+ # Store the relative path from the version root
+ lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}"
+
+ logger.info(f"Linking cross-references across {len(man_files)} man pages...")
+
+ # Process each man page HTML content
+ for man_file in man_files:
+ if not man_file.html_content:
+ continue
+
+ try:
+ html = man_file.html_content
+
+ # Find and replace man page references
+ # Mandoc outputs references as: name(section)
+ # Pattern matches both name(section) and plain name(section)
+ pattern = (
+ r"([\w\-_.]+)\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)"
+ )
+
+ def replace_reference(match):
+ full_match = match.group(0)
+
+ # Check if this match is already inside an tag
+ # Look back up to 500 chars for context
+ before_text = html[max(0, match.start() - 500) : match.start()]
+
+ # Find the last before this match
+ last_open = before_text.rfind("")
+
+ # If the last is after the last , we're inside a link
+ if last_open > last_close:
+ return full_match
+
+ if match.group(1): # name(section) format
+ name = match.group(1).lower()
+ section = match.group(2)
+ else: # plain name(section) format
+ name = match.group(3).lower()
+ section = match.group(4)
+
+ # Look up the referenced man page
+ key = (name, section)
+ if key in lookup:
+ # Calculate relative path from current file to target
+ target_path = lookup[key]
+ # File structure: output_dir/version/package_name/manN/file.html
+ # Need to go up 3 levels to reach output root, then down to version/target
+ # Current: version/package_name/manN/file.html
+ # Target: version/other_package/manM/file.html
+ rel_path = f"../../../{version}/{target_path}"
+ return f'{full_match}'
+
+ return full_match
+
+ updated_html = re.sub(pattern, replace_reference, html)
+
+ # Update the content if something changed
+ if updated_html != html:
+ man_file.html_content = updated_html
+
+ except Exception as e:
+ logger.warning(
+ f"Error linking references in {man_file.display_name}: {e}"
+ )
+
+ logger.info("Cross-reference linking complete")
+
def _get_output_path(self, man_file: ManFile) -> Path:
"""Determine output path for HTML file.