* Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> * Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> --------- Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
131 lines
4.7 KiB
Python
131 lines
4.7 KiB
Python
"""ManFile model representing a man page file."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import re
|
|
|
|
|
|
@dataclass
|
|
class ManFile:
|
|
"""Represents a man page file extracted from an RPM package.
|
|
|
|
Attributes:
|
|
file_path: Path to the extracted man page file
|
|
package_name: Name of the package this man page belongs to
|
|
section: Man page section (1-9)
|
|
name: Man page name without extension
|
|
language: Language code (e.g., 'en', 'es', None for default)
|
|
content: Raw man page content (gzipped or plain text)
|
|
html_content: Converted HTML content
|
|
html_path: Path where HTML file is saved
|
|
"""
|
|
|
|
file_path: Path
|
|
package_name: str
|
|
section: Optional[str] = None
|
|
name: Optional[str] = None
|
|
language: Optional[str] = None
|
|
content: Optional[bytes] = None
|
|
html_content: Optional[str] = None
|
|
html_path: Optional[Path] = None
|
|
|
|
def __post_init__(self):
|
|
"""Parse file information from the path."""
|
|
self._parse_path()
|
|
|
|
def _parse_path(self):
|
|
"""Extract section, name, and language from the file path.
|
|
|
|
Example paths:
|
|
/usr/share/man/man1/bash.1.gz
|
|
/usr/share/man/es/man1/bash.1.gz
|
|
/usr/share/man/man3/printf.3.gz
|
|
"""
|
|
parts = self.file_path.parts
|
|
filename = self.file_path.name
|
|
|
|
# Remove .gz extension if present
|
|
if filename.endswith('.gz'):
|
|
filename = filename[:-3]
|
|
|
|
# Extract section from parent directory (e.g., 'man1', 'man3p', 'man3pm')
|
|
for part in reversed(parts):
|
|
if part.startswith('man') and len(part) > 3:
|
|
# Check if it starts with 'man' followed by a digit
|
|
if part[3].isdigit():
|
|
self.section = part[3:]
|
|
break
|
|
|
|
# Extract section from filename if not found yet (e.g., 'foo.3pm' -> section '3pm')
|
|
# and extract name
|
|
name_parts = filename.split('.')
|
|
if len(name_parts) >= 2:
|
|
# Try to identify section from last part
|
|
potential_section = name_parts[-1]
|
|
# Section is typically digit optionally followed by letters (1, 3p, 3pm, etc.)
|
|
if potential_section and potential_section[0].isdigit():
|
|
if not self.section:
|
|
self.section = potential_section
|
|
self.name = '.'.join(name_parts[:-1])
|
|
else:
|
|
self.name = name_parts[0]
|
|
else:
|
|
self.name = name_parts[0]
|
|
|
|
# Check for language subdirectory
|
|
# Pattern: /usr/share/man/<lang>/man<section>/
|
|
for i, part in enumerate(parts):
|
|
if part == 'man' and i + 1 < len(parts):
|
|
next_part = parts[i + 1]
|
|
# If next part is not 'man<digit>', it's a language code
|
|
if not (next_part.startswith('man') and next_part[3:].isdigit()):
|
|
# Common language codes are 2-5 chars (en, es, pt_BR, etc.)
|
|
if len(next_part) <= 5:
|
|
self.language = next_part
|
|
break
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
"""Get display name for the man page (e.g., 'bash(1)')."""
|
|
return f"{self.name}({self.section})" if self.section else self.name
|
|
|
|
@property
|
|
def html_filename(self) -> str:
|
|
"""Get the HTML filename for this man page."""
|
|
# Clean name for filesystem safety
|
|
safe_name = self._clean_filename(self.name)
|
|
suffix = f".{self.language}" if self.language else ""
|
|
return f"{safe_name}.{self.section}{suffix}.html"
|
|
|
|
def _clean_filename(self, name: str) -> str:
|
|
"""Clean filename for filesystem safety."""
|
|
# Replace problematic characters
|
|
name = name.replace('/', '_')
|
|
name = name.replace(':', '_')
|
|
name = re.sub(r'\.\.', '__', name)
|
|
return name
|
|
|
|
@property
|
|
def uri_path(self) -> str:
|
|
"""Get the URI path for this man page (relative to version root).
|
|
|
|
Returns path like: 'bash/man1/bash.1.html'
|
|
"""
|
|
if not self.html_path:
|
|
return ""
|
|
# Get path relative to the version directory
|
|
# Assuming structure: html/<version>/<package>/<section>/<file>.html
|
|
parts = self.html_path.parts
|
|
try:
|
|
# Find the version part (e.g., '9.5') and return everything after it
|
|
for i, part in enumerate(parts):
|
|
if re.match(r'\d+\.\d+', part): # Version pattern
|
|
return '/'.join(parts[i+1:])
|
|
except (ValueError, IndexError):
|
|
pass
|
|
return str(self.html_path)
|
|
|
|
def __str__(self):
|
|
return f"{self.package_name}: {self.display_name}"
|