CUSP-1256 (#1)
* Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> * Complete refactor Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com> --------- Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
130
src/rocky_man/models/manfile.py
Normal file
130
src/rocky_man/models/manfile.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""ManFile model representing a man page file."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import re
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManFile:
|
||||
"""Represents a man page file extracted from an RPM package.
|
||||
|
||||
Attributes:
|
||||
file_path: Path to the extracted man page file
|
||||
package_name: Name of the package this man page belongs to
|
||||
section: Man page section (1-9)
|
||||
name: Man page name without extension
|
||||
language: Language code (e.g., 'en', 'es', None for default)
|
||||
content: Raw man page content (gzipped or plain text)
|
||||
html_content: Converted HTML content
|
||||
html_path: Path where HTML file is saved
|
||||
"""
|
||||
|
||||
file_path: Path
|
||||
package_name: str
|
||||
section: Optional[str] = None
|
||||
name: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
content: Optional[bytes] = None
|
||||
html_content: Optional[str] = None
|
||||
html_path: Optional[Path] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Parse file information from the path."""
|
||||
self._parse_path()
|
||||
|
||||
def _parse_path(self):
|
||||
"""Extract section, name, and language from the file path.
|
||||
|
||||
Example paths:
|
||||
/usr/share/man/man1/bash.1.gz
|
||||
/usr/share/man/es/man1/bash.1.gz
|
||||
/usr/share/man/man3/printf.3.gz
|
||||
"""
|
||||
parts = self.file_path.parts
|
||||
filename = self.file_path.name
|
||||
|
||||
# Remove .gz extension if present
|
||||
if filename.endswith('.gz'):
|
||||
filename = filename[:-3]
|
||||
|
||||
# Extract section from parent directory (e.g., 'man1', 'man3p', 'man3pm')
|
||||
for part in reversed(parts):
|
||||
if part.startswith('man') and len(part) > 3:
|
||||
# Check if it starts with 'man' followed by a digit
|
||||
if part[3].isdigit():
|
||||
self.section = part[3:]
|
||||
break
|
||||
|
||||
# Extract section from filename if not found yet (e.g., 'foo.3pm' -> section '3pm')
|
||||
# and extract name
|
||||
name_parts = filename.split('.')
|
||||
if len(name_parts) >= 2:
|
||||
# Try to identify section from last part
|
||||
potential_section = name_parts[-1]
|
||||
# Section is typically digit optionally followed by letters (1, 3p, 3pm, etc.)
|
||||
if potential_section and potential_section[0].isdigit():
|
||||
if not self.section:
|
||||
self.section = potential_section
|
||||
self.name = '.'.join(name_parts[:-1])
|
||||
else:
|
||||
self.name = name_parts[0]
|
||||
else:
|
||||
self.name = name_parts[0]
|
||||
|
||||
# Check for language subdirectory
|
||||
# Pattern: /usr/share/man/<lang>/man<section>/
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'man' and i + 1 < len(parts):
|
||||
next_part = parts[i + 1]
|
||||
# If next part is not 'man<digit>', it's a language code
|
||||
if not (next_part.startswith('man') and next_part[3:].isdigit()):
|
||||
# Common language codes are 2-5 chars (en, es, pt_BR, etc.)
|
||||
if len(next_part) <= 5:
|
||||
self.language = next_part
|
||||
break
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Get display name for the man page (e.g., 'bash(1)')."""
|
||||
return f"{self.name}({self.section})" if self.section else self.name
|
||||
|
||||
@property
|
||||
def html_filename(self) -> str:
|
||||
"""Get the HTML filename for this man page."""
|
||||
# Clean name for filesystem safety
|
||||
safe_name = self._clean_filename(self.name)
|
||||
suffix = f".{self.language}" if self.language else ""
|
||||
return f"{safe_name}.{self.section}{suffix}.html"
|
||||
|
||||
def _clean_filename(self, name: str) -> str:
|
||||
"""Clean filename for filesystem safety."""
|
||||
# Replace problematic characters
|
||||
name = name.replace('/', '_')
|
||||
name = name.replace(':', '_')
|
||||
name = re.sub(r'\.\.', '__', name)
|
||||
return name
|
||||
|
||||
@property
|
||||
def uri_path(self) -> str:
|
||||
"""Get the URI path for this man page (relative to version root).
|
||||
|
||||
Returns path like: 'bash/man1/bash.1.html'
|
||||
"""
|
||||
if not self.html_path:
|
||||
return ""
|
||||
# Get path relative to the version directory
|
||||
# Assuming structure: html/<version>/<package>/<section>/<file>.html
|
||||
parts = self.html_path.parts
|
||||
try:
|
||||
# Find the version part (e.g., '9.5') and return everything after it
|
||||
for i, part in enumerate(parts):
|
||||
if re.match(r'\d+\.\d+', part): # Version pattern
|
||||
return '/'.join(parts[i+1:])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
return str(self.html_path)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.package_name}: {self.display_name}"
|
||||
Reference in New Issue
Block a user