Refactor Rocky Man Page Generator
- Improved logging and error handling in main.py, including better version management and configuration handling. - Enhanced RepoManager to dynamically switch between pub and vault repositories for package retrieval. - Updated ManPageConverter to handle symlink detection and generate appropriate redirect HTML. - Refined WebGenerator to support dynamic version grid layout and improved HTML generation for man pages and indexes. - Modified templates to remove trademark symbols and enhance styling for version cards. - Added support for existing versions in the root index generation. Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
@@ -16,16 +17,12 @@ def setup_logging(verbose: bool = False):
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
|
||||
def process_version(
|
||||
config: Config,
|
||||
version: str,
|
||||
template_dir: Path
|
||||
) -> bool:
|
||||
def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
"""Process a single Rocky Linux version.
|
||||
|
||||
Args:
|
||||
@@ -53,21 +50,18 @@ def process_version(
|
||||
# Use first available architecture (man pages are arch-independent)
|
||||
arch = config.architectures[0]
|
||||
|
||||
# Get repository URL
|
||||
repo_url = config.get_repo_url(version, repo_type, arch)
|
||||
|
||||
# Create cache dir for this repo
|
||||
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
|
||||
|
||||
try:
|
||||
# Initialize repository manager
|
||||
repo_manager = RepoManager(
|
||||
repo_url=repo_url,
|
||||
config=config,
|
||||
version=version,
|
||||
repo_type=repo_type,
|
||||
arch=arch,
|
||||
cache_dir=cache_dir,
|
||||
download_dir=version_download_dir
|
||||
download_dir=version_download_dir,
|
||||
)
|
||||
|
||||
# List packages (with man pages only)
|
||||
@@ -83,19 +77,19 @@ def process_version(
|
||||
if config.skip_packages:
|
||||
original_count = len(packages)
|
||||
packages = [
|
||||
pkg for pkg in packages
|
||||
if pkg.name not in config.skip_packages
|
||||
pkg for pkg in packages if pkg.name not in config.skip_packages
|
||||
]
|
||||
filtered_count = original_count - len(packages)
|
||||
if filtered_count > 0:
|
||||
logger.info(f"Filtered out {filtered_count} packages based on skip list")
|
||||
logger.info(
|
||||
f"Filtered out {filtered_count} packages based on skip list"
|
||||
)
|
||||
logger.info(f"Processing {len(packages)} packages")
|
||||
|
||||
# Download packages
|
||||
logger.info("Downloading packages...")
|
||||
downloaded = repo_manager.download_packages(
|
||||
packages,
|
||||
max_workers=config.parallel_downloads
|
||||
packages, max_workers=config.parallel_downloads
|
||||
)
|
||||
|
||||
# Extract man pages
|
||||
@@ -103,11 +97,10 @@ def process_version(
|
||||
extractor = ManPageExtractor(
|
||||
version_extract_dir,
|
||||
skip_sections=config.skip_sections,
|
||||
skip_languages=config.skip_languages
|
||||
skip_languages=config.skip_languages,
|
||||
)
|
||||
man_files = extractor.extract_from_packages(
|
||||
downloaded,
|
||||
max_workers=config.parallel_downloads
|
||||
downloaded, max_workers=config.parallel_downloads
|
||||
)
|
||||
|
||||
logger.info(f"Extracted {len(man_files)} man pages")
|
||||
@@ -124,8 +117,7 @@ def process_version(
|
||||
logger.info("Converting man pages to HTML...")
|
||||
converter = ManPageConverter(version_output_dir)
|
||||
converted = converter.convert_many(
|
||||
man_files_with_content,
|
||||
max_workers=config.parallel_conversions
|
||||
man_files_with_content, max_workers=config.parallel_conversions
|
||||
)
|
||||
|
||||
all_man_files.extend(converted)
|
||||
@@ -149,11 +141,6 @@ def process_version(
|
||||
logger.error(f"No man pages were successfully processed for version {version}")
|
||||
return False
|
||||
|
||||
# Link cross-references between man pages
|
||||
logger.info("Linking cross-references...")
|
||||
converter = ManPageConverter(version_output_dir)
|
||||
converter.link_cross_references(all_man_files)
|
||||
|
||||
# Generate web pages
|
||||
logger.info("Generating web pages...")
|
||||
web_gen = WebGenerator(template_dir, config.output_dir)
|
||||
@@ -168,132 +155,154 @@ def process_version(
|
||||
# Generate packages index page
|
||||
web_gen.generate_packages_index(version, search_index)
|
||||
|
||||
# Set HTML paths for all man files
|
||||
for man_file in all_man_files:
|
||||
if not man_file.html_path:
|
||||
man_file.html_path = web_gen._get_manpage_path(man_file, version)
|
||||
|
||||
# Link cross-references between man pages
|
||||
logger.info("Linking cross-references...")
|
||||
converter.link_cross_references(all_man_files, version)
|
||||
|
||||
# Wrap man pages in templates
|
||||
logger.info("Generating man page HTML...")
|
||||
for man_file in all_man_files:
|
||||
web_gen.generate_manpage_html(man_file, version)
|
||||
|
||||
logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}")
|
||||
logger.info(
|
||||
f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate HTML documentation for Rocky Linux man pages'
|
||||
description="Generate HTML documentation for Rocky Linux man pages"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--versions',
|
||||
nargs='+',
|
||||
default=['8.10', '9.6', '10.0'],
|
||||
help='Rocky Linux versions to process (default: 8.10 9.6 10.0)'
|
||||
"--versions",
|
||||
nargs="+",
|
||||
default=["8.10", "9.6", "10.0"],
|
||||
help="Rocky Linux versions to process (default: 8.10 9.6 10.0)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--repo-types',
|
||||
nargs='+',
|
||||
default=['BaseOS', 'AppStream'],
|
||||
help='Repository types to process (default: BaseOS AppStream)'
|
||||
"--repo-types",
|
||||
nargs="+",
|
||||
default=["BaseOS", "AppStream"],
|
||||
help="Repository types to process (default: BaseOS AppStream)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=Path('./html'),
|
||||
help='Output directory for HTML files (default: ./html)'
|
||||
default=Path("./html"),
|
||||
help="Output directory for HTML files (default: ./html)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--download-dir',
|
||||
"--download-dir",
|
||||
type=Path,
|
||||
default=Path('./tmp/downloads'),
|
||||
help='Directory for downloading packages (default: ./tmp/downloads)'
|
||||
default=Path("./tmp/downloads"),
|
||||
help="Directory for downloading packages (default: ./tmp/downloads)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--extract-dir',
|
||||
"--extract-dir",
|
||||
type=Path,
|
||||
default=Path('./tmp/extracts'),
|
||||
help='Directory for extracting man pages (default: ./tmp/extracts)'
|
||||
default=Path("./tmp/extracts"),
|
||||
help="Directory for extracting man pages (default: ./tmp/extracts)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-rpms',
|
||||
action='store_true',
|
||||
help='Keep downloaded RPM files after processing'
|
||||
"--keep-rpms",
|
||||
action="store_true",
|
||||
help="Keep downloaded RPM files after processing",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-extracts',
|
||||
action='store_true',
|
||||
help='Keep extracted man files after processing'
|
||||
"--keep-extracts",
|
||||
action="store_true",
|
||||
help="Keep extracted man files after processing",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--parallel-downloads',
|
||||
"--parallel-downloads",
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of parallel downloads (default: 5)'
|
||||
help="Number of parallel downloads (default: 5)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--parallel-conversions',
|
||||
"--parallel-conversions",
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of parallel HTML conversions (default: 10)'
|
||||
help="Number of parallel HTML conversions (default: 10)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--mirror',
|
||||
default='http://dl.rockylinux.org/',
|
||||
help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)'
|
||||
"--mirror",
|
||||
default="http://dl.rockylinux.org/",
|
||||
help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--template-dir',
|
||||
"--vault",
|
||||
action="store_true",
|
||||
help="Use vault directory instead of pub (vault/rocky instead of pub/rocky)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--existing-versions",
|
||||
nargs="*",
|
||||
metavar="VERSION",
|
||||
help="List of existing versions to include in root index (e.g., 8.10 9.7)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--template-dir",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent.parent / 'templates',
|
||||
help='Template directory (default: ./templates)'
|
||||
default=Path(__file__).parent.parent.parent / "templates",
|
||||
help="Template directory (default: ./templates)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='Enable verbose logging'
|
||||
"-v", "--verbose", action="store_true", help="Enable verbose logging"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-sections',
|
||||
nargs='*',
|
||||
"--skip-sections",
|
||||
nargs="*",
|
||||
default=None,
|
||||
help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.'
|
||||
help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-packages',
|
||||
nargs='*',
|
||||
"--skip-packages",
|
||||
nargs="*",
|
||||
default=None,
|
||||
help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.'
|
||||
help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--skip-languages',
|
||||
action='store_true',
|
||||
"--skip-languages",
|
||||
action="store_true",
|
||||
default=None,
|
||||
help='Skip non-English man pages (default: enabled)'
|
||||
help="Skip non-English man pages (default: enabled)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keep-languages',
|
||||
action='store_true',
|
||||
help='Keep all languages (disables --skip-languages)'
|
||||
"--keep-languages",
|
||||
action="store_true",
|
||||
help="Keep all languages (disables --skip-languages)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--allow-all-sections',
|
||||
action='store_true',
|
||||
help='Include all man sections (overrides --skip-sections)'
|
||||
"--allow-all-sections",
|
||||
action="store_true",
|
||||
help="Include all man sections (overrides --skip-sections)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -309,9 +318,13 @@ def main():
|
||||
elif args.skip_languages is not None:
|
||||
skip_languages = args.skip_languages
|
||||
|
||||
# Determine content directory
|
||||
content_dir = "vault/rocky" if args.vault else "pub/rocky"
|
||||
|
||||
# Create configuration
|
||||
config = Config(
|
||||
base_url=args.mirror,
|
||||
content_dir=content_dir,
|
||||
versions=args.versions,
|
||||
repo_types=args.repo_types,
|
||||
download_dir=args.download_dir,
|
||||
@@ -324,11 +337,31 @@ def main():
|
||||
skip_sections=args.skip_sections,
|
||||
skip_packages=args.skip_packages,
|
||||
skip_languages=skip_languages,
|
||||
allow_all_sections=args.allow_all_sections
|
||||
allow_all_sections=args.allow_all_sections,
|
||||
)
|
||||
|
||||
# Get existing versions from scan and argument
|
||||
scanned_versions = [
|
||||
d.name
|
||||
for d in config.output_dir.iterdir()
|
||||
if d.is_dir() and re.match(r"\d+\.\d+", d.name)
|
||||
]
|
||||
arg_versions = args.existing_versions or []
|
||||
|
||||
# Sort versions numerically by (major, minor)
|
||||
def version_key(v):
|
||||
try:
|
||||
major, minor = v.split(".")
|
||||
return (int(major), int(minor))
|
||||
except (ValueError, AttributeError):
|
||||
return (0, 0)
|
||||
|
||||
existing_versions = sorted(set(scanned_versions + arg_versions), key=version_key)
|
||||
all_versions = sorted(set(existing_versions + config.versions), key=version_key)
|
||||
|
||||
logger.info("Rocky Man - Rocky Linux Man Page Generator")
|
||||
logger.info(f"Versions: {', '.join(config.versions)}")
|
||||
logger.info(f"Versions to process: {', '.join(config.versions)}")
|
||||
logger.info(f"All known versions: {', '.join(all_versions)}")
|
||||
logger.info(f"Repositories: {', '.join(config.repo_types)}")
|
||||
logger.info(f"Output directory: {config.output_dir}")
|
||||
|
||||
@@ -362,7 +395,7 @@ def main():
|
||||
# Generate root index
|
||||
logger.info("Generating root index page...")
|
||||
web_gen = WebGenerator(args.template_dir, config.output_dir)
|
||||
web_gen.generate_root_index(processed_versions)
|
||||
web_gen.generate_root_index(all_versions)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("Processing complete!")
|
||||
@@ -373,5 +406,5 @@ def main():
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@@ -40,11 +40,7 @@ class ManPageConverter:
|
||||
try:
|
||||
# Run mandoc with no arguments - it will show usage and exit
|
||||
# We just want to verify the command exists, not that it succeeds
|
||||
subprocess.run(
|
||||
['mandoc'],
|
||||
capture_output=True,
|
||||
timeout=5
|
||||
)
|
||||
subprocess.run(["mandoc"], capture_output=True, timeout=5)
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
# mandoc command not found
|
||||
@@ -73,6 +69,31 @@ class ManPageConverter:
|
||||
# Clean up HTML
|
||||
html = self._clean_html(html)
|
||||
|
||||
# Check if mandoc output indicates this is a symlink/redirect
|
||||
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
|
||||
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
|
||||
# or: <div class="manual-text">See the file man1/builtin.1.</div>
|
||||
symlink_match = re.search(
|
||||
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
if not symlink_match:
|
||||
# Try simpler pattern without "See the file" or period
|
||||
symlink_match = re.search(
|
||||
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
if symlink_match:
|
||||
name = symlink_match.group(2)
|
||||
section = symlink_match.group(3)
|
||||
logger.info(
|
||||
f"{man_file.display_name} detected as symlink to {name}({section})"
|
||||
)
|
||||
html = self._generate_redirect_html({"name": name, "section": section})
|
||||
|
||||
# Store in ManFile object
|
||||
man_file.html_content = html
|
||||
|
||||
@@ -82,7 +103,7 @@ class ManPageConverter:
|
||||
|
||||
# Save HTML file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
logger.debug(f"Converted {man_file.display_name} -> {output_path}")
|
||||
@@ -93,9 +114,7 @@ class ManPageConverter:
|
||||
return False
|
||||
|
||||
def convert_many(
|
||||
self,
|
||||
man_files: List[tuple],
|
||||
max_workers: int = 10
|
||||
self, man_files: List[tuple], max_workers: int = 10
|
||||
) -> List[ManFile]:
|
||||
"""Convert multiple man pages in parallel.
|
||||
|
||||
@@ -138,21 +157,21 @@ class ManPageConverter:
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['mandoc', '-T', 'html', '-O', 'fragment,toc'],
|
||||
input=content.encode('utf-8'),
|
||||
["mandoc", "-T", "html", "-O", "fragment,toc"],
|
||||
input=content.encode("utf-8"),
|
||||
capture_output=True,
|
||||
timeout=30
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
||||
stderr = result.stderr.decode("utf-8", errors="replace")
|
||||
logger.warning(f"mandoc returned error: {stderr}")
|
||||
# Sometimes mandoc returns non-zero but still produces output
|
||||
if result.stdout:
|
||||
return result.stdout.decode('utf-8', errors='replace')
|
||||
return result.stdout.decode("utf-8", errors="replace")
|
||||
return None
|
||||
|
||||
return result.stdout.decode('utf-8', errors='replace')
|
||||
return result.stdout.decode("utf-8", errors="replace")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("mandoc conversion timed out")
|
||||
@@ -172,14 +191,10 @@ class ManPageConverter:
|
||||
"""
|
||||
# Remove empty parentheses in header cells
|
||||
html = re.sub(
|
||||
r'<td class="head-ltitle">\(\)</td>',
|
||||
'<td class="head-ltitle"></td>',
|
||||
html
|
||||
r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
|
||||
)
|
||||
html = re.sub(
|
||||
r'<td class="head-rtitle">\(\)</td>',
|
||||
'<td class="head-rtitle"></td>',
|
||||
html
|
||||
r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
|
||||
)
|
||||
|
||||
# Strip leading/trailing whitespace
|
||||
@@ -187,7 +202,34 @@ class ManPageConverter:
|
||||
|
||||
return html
|
||||
|
||||
def link_cross_references(self, man_files: List[ManFile]) -> None:
|
||||
def _generate_redirect_html(self, target_info: dict) -> str:
|
||||
"""Generate HTML for a symlink/redirect page.
|
||||
|
||||
Args:
|
||||
target_info: Dict with 'name' and 'section' of target man page
|
||||
|
||||
Returns:
|
||||
HTML fragment for redirect page
|
||||
"""
|
||||
name = target_info["name"]
|
||||
section = target_info["section"]
|
||||
|
||||
# Generate the relative path to the target man page
|
||||
# Symlinks are in the same package, just different file names
|
||||
target_filename = f"{name}.{section}.html"
|
||||
|
||||
# Generate simple redirect HTML with a working hyperlink
|
||||
html = f'''<div class="symlink-notice" style="padding: 2rem; text-align: center; background-color: var(--bg-tertiary); border-radius: 8px; border: 1px solid var(--border-color);">
|
||||
<p style="font-size: 1.2rem; margin-bottom: 1.5rem; color: var(--text-primary);">
|
||||
This is an alias for <b>{name}</b>({section}).
|
||||
</p>
|
||||
<p style="font-size: 1.1rem;">
|
||||
<a href="{target_filename}" style="color: var(--accent-primary); text-decoration: none; font-weight: 500;">View the manual page</a>
|
||||
</p>
|
||||
</div>'''
|
||||
return html
|
||||
|
||||
def link_cross_references(self, man_files: List[ManFile], version: str) -> None:
|
||||
"""Add hyperlinks to cross-references in SEE ALSO sections.
|
||||
|
||||
Goes through all converted HTML files and converts man page references
|
||||
@@ -206,31 +248,31 @@ class ManPageConverter:
|
||||
|
||||
logger.info(f"Linking cross-references across {len(man_files)} man pages...")
|
||||
|
||||
# Process each man page HTML file
|
||||
# Process each man page HTML content
|
||||
for man_file in man_files:
|
||||
if not man_file.html_path or not man_file.html_path.exists():
|
||||
if not man_file.html_content:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Read the HTML
|
||||
with open(man_file.html_path, 'r', encoding='utf-8') as f:
|
||||
html = f.read()
|
||||
html = man_file.html_content
|
||||
|
||||
# Find and replace man page references
|
||||
# Mandoc outputs references as: <b>name</b>(section)
|
||||
# Pattern matches both <b>name</b>(section) and plain name(section)
|
||||
pattern = r'<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)'
|
||||
pattern = (
|
||||
r"<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)"
|
||||
)
|
||||
|
||||
def replace_reference(match):
|
||||
full_match = match.group(0)
|
||||
|
||||
# Check if this match is already inside an <a> tag
|
||||
# Look back up to 500 chars for context
|
||||
before_text = html[max(0, match.start()-500):match.start()]
|
||||
before_text = html[max(0, match.start() - 500) : match.start()]
|
||||
|
||||
# Find the last <a and last </a> before this match
|
||||
last_open = before_text.rfind('<a ')
|
||||
last_close = before_text.rfind('</a>')
|
||||
last_open = before_text.rfind("<a ")
|
||||
last_close = before_text.rfind("</a>")
|
||||
|
||||
# If the last <a> is after the last </a>, we're inside a link
|
||||
if last_open > last_close:
|
||||
@@ -249,23 +291,24 @@ class ManPageConverter:
|
||||
# Calculate relative path from current file to target
|
||||
target_path = lookup[key]
|
||||
# File structure: output_dir/version/package_name/manN/file.html
|
||||
# Need to go up 3 levels to reach version root
|
||||
# Current: package_name/manN/file.html
|
||||
# Target: other_package/manM/file.html
|
||||
rel_path = f"../../../{target_path}"
|
||||
# Need to go up 3 levels to reach output root, then down to version/target
|
||||
# Current: version/package_name/manN/file.html
|
||||
# Target: version/other_package/manM/file.html
|
||||
rel_path = f"../../../{version}/{target_path}"
|
||||
return f'<a href="{rel_path}">{full_match}</a>'
|
||||
|
||||
return full_match
|
||||
|
||||
updated_html = re.sub(pattern, replace_reference, html)
|
||||
|
||||
# Only write if something changed
|
||||
# Update the content if something changed
|
||||
if updated_html != html:
|
||||
with open(man_file.html_path, 'w', encoding='utf-8') as f:
|
||||
f.write(updated_html)
|
||||
man_file.html_content = updated_html
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error linking references in {man_file.display_name}: {e}")
|
||||
logger.warning(
|
||||
f"Error linking references in {man_file.display_name}: {e}"
|
||||
)
|
||||
|
||||
logger.info("Cross-reference linking complete")
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class RepoManager:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_url: str,
|
||||
config,
|
||||
version: str,
|
||||
repo_type: str,
|
||||
arch: str,
|
||||
@@ -35,14 +35,14 @@ class RepoManager:
|
||||
"""Initialize repository manager.
|
||||
|
||||
Args:
|
||||
repo_url: Full repository URL
|
||||
config: Configuration object
|
||||
version: Rocky Linux version (e.g., '9.5')
|
||||
repo_type: Repository type ('BaseOS' or 'AppStream')
|
||||
arch: Architecture (e.g., 'x86_64')
|
||||
cache_dir: Directory for caching metadata
|
||||
download_dir: Directory for downloading packages
|
||||
"""
|
||||
self.repo_url = repo_url
|
||||
self.config = config
|
||||
self.version = version
|
||||
self.repo_type = repo_type
|
||||
self.arch = arch
|
||||
@@ -58,7 +58,7 @@ class RepoManager:
|
||||
self.base.conf.errorlevel = 0
|
||||
self.base.conf.cachedir = str(self.cache_dir / "dnf")
|
||||
|
||||
self._configure_repo()
|
||||
self.repo_url = None
|
||||
self.packages_with_manpages: Optional[Set[str]] = None
|
||||
|
||||
def _configure_repo(self):
|
||||
@@ -88,8 +88,32 @@ class RepoManager:
|
||||
if self.packages_with_manpages is not None:
|
||||
return self.packages_with_manpages
|
||||
|
||||
parser = ContentsParser(self.repo_url, self.cache_dir)
|
||||
self.packages_with_manpages = parser.get_packages_with_manpages()
|
||||
# Try pub first, then vault if it fails
|
||||
content_dirs = ["pub/rocky", "vault/rocky"]
|
||||
for content_dir in content_dirs:
|
||||
original_content_dir = self.config.content_dir
|
||||
self.config.content_dir = content_dir
|
||||
try:
|
||||
repo_url = self.config.get_repo_url(
|
||||
self.version, self.repo_type, self.arch
|
||||
)
|
||||
parser = ContentsParser(repo_url, self.cache_dir)
|
||||
packages = parser.get_packages_with_manpages()
|
||||
if packages: # Only use if it has man pages
|
||||
self.packages_with_manpages = packages
|
||||
self.repo_url = repo_url # Set for later use
|
||||
logger.info(f"Using repository: {repo_url}")
|
||||
break
|
||||
else:
|
||||
logger.warning(f"No man pages found in {content_dir}, trying next")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load metadata from {content_dir}: {e}")
|
||||
finally:
|
||||
self.config.content_dir = original_content_dir
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Failed to load repository metadata for {self.version} {self.repo_type} from both pub and vault"
|
||||
)
|
||||
|
||||
return self.packages_with_manpages
|
||||
|
||||
@@ -102,7 +126,9 @@ class RepoManager:
|
||||
Returns:
|
||||
List of Package objects
|
||||
"""
|
||||
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})")
|
||||
logger.info(
|
||||
f"Querying packages from {self.repo_type} ({self.version}/{self.arch})"
|
||||
)
|
||||
|
||||
# Get packages with man pages if filtering
|
||||
manpage_packages = None
|
||||
@@ -110,6 +136,9 @@ class RepoManager:
|
||||
manpage_packages = self.discover_packages_with_manpages()
|
||||
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
|
||||
|
||||
# Configure DNF repo now that we have the correct repo_url
|
||||
self._configure_repo()
|
||||
|
||||
packages = []
|
||||
|
||||
# Query all available packages
|
||||
@@ -176,7 +205,7 @@ class RepoManager:
|
||||
response.raise_for_status()
|
||||
|
||||
# Download with progress (optional: could add progress bar here)
|
||||
with open(download_path, 'wb') as f:
|
||||
with open(download_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
@@ -192,9 +221,7 @@ class RepoManager:
|
||||
return False
|
||||
|
||||
def download_packages(
|
||||
self,
|
||||
packages: List[Package],
|
||||
max_workers: int = 5
|
||||
self, packages: List[Package], max_workers: int = 5
|
||||
) -> List[Package]:
|
||||
"""Download multiple packages in parallel.
|
||||
|
||||
@@ -210,8 +237,7 @@ class RepoManager:
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all download tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.download_package, pkg): pkg
|
||||
for pkg in packages
|
||||
executor.submit(self.download_package, pkg): pkg for pkg in packages
|
||||
}
|
||||
|
||||
# Process completed downloads
|
||||
@@ -223,7 +249,9 @@ class RepoManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {pkg.name}: {e}")
|
||||
|
||||
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages")
|
||||
logger.info(
|
||||
f"Successfully downloaded {len(downloaded)}/{len(packages)} packages"
|
||||
)
|
||||
return downloaded
|
||||
|
||||
def cleanup_package(self, package: Package):
|
||||
|
||||
@@ -36,7 +36,7 @@ class WebGenerator:
|
||||
# Setup Jinja2 environment
|
||||
self.env = Environment(
|
||||
loader=FileSystemLoader(str(self.template_dir)),
|
||||
autoescape=select_autoescape(['html', 'xml'])
|
||||
autoescape=select_autoescape(["html", "xml"]),
|
||||
)
|
||||
|
||||
def generate_manpage_html(self, man_file: ManFile, version: str) -> bool:
|
||||
@@ -54,7 +54,7 @@ class WebGenerator:
|
||||
return False
|
||||
|
||||
try:
|
||||
template = self.env.get_template('manpage.html')
|
||||
template = self.env.get_template("manpage.html")
|
||||
|
||||
html = template.render(
|
||||
title=f"{man_file.display_name} - {man_file.package_name} - Rocky Linux {version}",
|
||||
@@ -62,8 +62,8 @@ class WebGenerator:
|
||||
package_name=man_file.package_name,
|
||||
version=version,
|
||||
section=man_file.section,
|
||||
language=man_file.language or 'en',
|
||||
content=man_file.html_content
|
||||
language=man_file.language or "en",
|
||||
content=man_file.html_content,
|
||||
)
|
||||
|
||||
# Ensure output path is set
|
||||
@@ -72,7 +72,7 @@ class WebGenerator:
|
||||
|
||||
man_file.html_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(man_file.html_path, 'w', encoding='utf-8') as f:
|
||||
with open(man_file.html_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
return True
|
||||
@@ -92,19 +92,19 @@ class WebGenerator:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
template = self.env.get_template('index.html')
|
||||
template = self.env.get_template("index.html")
|
||||
|
||||
html = template.render(
|
||||
title=f"Rocky Linux {version} Man Pages",
|
||||
version=version,
|
||||
total_pages=len(search_data),
|
||||
packages=sorted(search_data.keys())
|
||||
packages=sorted(search_data.keys()),
|
||||
)
|
||||
|
||||
index_path = self.output_dir / version / 'index.html'
|
||||
index_path = self.output_dir / version / "index.html"
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info(f"Generated index for version {version}")
|
||||
@@ -113,8 +113,10 @@ class WebGenerator:
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating index for {version}: {e}")
|
||||
return False
|
||||
|
||||
def generate_packages_index(self, version: str, search_data: Dict[str, Any]) -> bool:
|
||||
|
||||
def generate_packages_index(
|
||||
self, version: str, search_data: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""Generate full packages index page.
|
||||
|
||||
Args:
|
||||
@@ -127,37 +129,36 @@ class WebGenerator:
|
||||
try:
|
||||
# Group packages by first letter
|
||||
packages_by_letter = {}
|
||||
|
||||
|
||||
for pkg_name, pages in search_data.items():
|
||||
first_char = pkg_name[0].upper()
|
||||
if not first_char.isalpha():
|
||||
first_char = 'other'
|
||||
|
||||
first_char = "other"
|
||||
|
||||
if first_char not in packages_by_letter:
|
||||
packages_by_letter[first_char] = []
|
||||
|
||||
packages_by_letter[first_char].append({
|
||||
'name': pkg_name,
|
||||
'count': len(pages)
|
||||
})
|
||||
|
||||
packages_by_letter[first_char].append(
|
||||
{"name": pkg_name, "count": len(pages)}
|
||||
)
|
||||
|
||||
# Sort packages within each letter
|
||||
for letter in packages_by_letter:
|
||||
packages_by_letter[letter].sort(key=lambda x: x['name'])
|
||||
packages_by_letter[letter].sort(key=lambda x: x["name"])
|
||||
|
||||
template = self.env.get_template('packages.html')
|
||||
template = self.env.get_template("packages.html")
|
||||
|
||||
html = template.render(
|
||||
title=f"All Packages - Rocky Linux {version}",
|
||||
version=version,
|
||||
total_packages=len(search_data),
|
||||
packages_by_letter=packages_by_letter
|
||||
packages_by_letter=packages_by_letter,
|
||||
)
|
||||
|
||||
output_path = self.output_dir / version / 'packages.html'
|
||||
output_path = self.output_dir / version / "packages.html"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info(f"Generated packages index for version {version}")
|
||||
@@ -168,9 +169,7 @@ class WebGenerator:
|
||||
return False
|
||||
|
||||
def generate_search_index(
|
||||
self,
|
||||
man_files: List[ManFile],
|
||||
version: str
|
||||
self, man_files: List[ManFile], version: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate search index from man files.
|
||||
|
||||
@@ -191,12 +190,12 @@ class WebGenerator:
|
||||
|
||||
# Create entry for this man page
|
||||
entry = {
|
||||
'name': man_file.name,
|
||||
'section': man_file.section,
|
||||
'display_name': man_file.display_name,
|
||||
'language': man_file.language or 'en',
|
||||
'url': man_file.uri_path,
|
||||
'full_name': f"{man_file.package_name} - {man_file.display_name}"
|
||||
"name": man_file.name,
|
||||
"section": man_file.section,
|
||||
"display_name": man_file.display_name,
|
||||
"language": man_file.language or "en",
|
||||
"url": man_file.uri_path,
|
||||
"full_name": f"{man_file.package_name} - {man_file.display_name}",
|
||||
}
|
||||
|
||||
# Use display name as key (handles duplicates with different sections)
|
||||
@@ -222,18 +221,18 @@ class WebGenerator:
|
||||
version_dir = self.output_dir / version
|
||||
version_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
json_path = version_dir / 'search.json'
|
||||
gz_path = version_dir / 'search.json.gz'
|
||||
json_path = version_dir / "search.json"
|
||||
gz_path = version_dir / "search.json.gz"
|
||||
|
||||
# Sort for consistency
|
||||
sorted_index = {k: index[k] for k in sorted(index)}
|
||||
|
||||
# Save plain JSON
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(sorted_index, f, indent=2)
|
||||
|
||||
# Save gzipped JSON
|
||||
with gzip.open(gz_path, 'wt', encoding='utf-8') as f:
|
||||
with gzip.open(gz_path, "wt", encoding="utf-8") as f:
|
||||
json.dump(sorted_index, f)
|
||||
|
||||
logger.info(f"Saved search index for {version} ({len(index)} packages)")
|
||||
@@ -269,24 +268,42 @@ class WebGenerator:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
template = self.env.get_template('root.html')
|
||||
template = self.env.get_template("root.html")
|
||||
|
||||
# Sort versions numerically (e.g., 8.10, 9.6, 10.0)
|
||||
def version_key(v):
|
||||
# Group versions by major version
|
||||
major_to_minors = {}
|
||||
for v in versions:
|
||||
try:
|
||||
parts = v.split('.')
|
||||
return tuple(int(p) for p in parts)
|
||||
except (ValueError, AttributeError):
|
||||
return (0, 0)
|
||||
major, minor = v.split(".")
|
||||
major_to_minors.setdefault(major, []).append(minor)
|
||||
except ValueError:
|
||||
continue # Skip invalid versions
|
||||
|
||||
# Sort majors ascending, minors descending within each major
|
||||
sorted_majors = sorted(major_to_minors, key=int)
|
||||
max_minors = max(len(major_to_minors[major]) for major in sorted_majors)
|
||||
num_columns = len(sorted_majors)
|
||||
|
||||
# Create rows for grid layout (each row has one version from each major)
|
||||
# This creates the data structure for proper column grouping
|
||||
version_rows = []
|
||||
for minor_idx in range(max_minors):
|
||||
row = []
|
||||
for major in sorted_majors:
|
||||
minors_list = sorted(major_to_minors[major], key=int, reverse=True)
|
||||
if minor_idx < len(minors_list):
|
||||
row.append((major, minors_list[minor_idx]))
|
||||
else:
|
||||
row.append(None) # Placeholder for empty cells
|
||||
version_rows.append(row)
|
||||
|
||||
html = template.render(
|
||||
title="Rocky Linux Man Pages",
|
||||
versions=sorted(versions, key=version_key)
|
||||
title="Rocky Linux Man Pages", version_rows=version_rows, num_columns=num_columns
|
||||
)
|
||||
|
||||
index_path = self.output_dir / 'index.html'
|
||||
index_path = self.output_dir / "index.html"
|
||||
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info("Generated root index page")
|
||||
|
||||
Reference in New Issue
Block a user