CUSP-1339 -- Fixed another usecase

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
Stephen Simpson
2025-12-04 12:15:33 -06:00
parent ffc0d11bbb
commit 907d92bb16

View File

@@ -40,11 +40,7 @@ class ManPageConverter:
try: try:
# Run mandoc with no arguments - it will show usage and exit # Run mandoc with no arguments - it will show usage and exit
# We just want to verify the command exists, not that it succeeds # We just want to verify the command exists, not that it succeeds
subprocess.run( subprocess.run(["mandoc"], capture_output=True, timeout=5)
['mandoc'],
capture_output=True,
timeout=5
)
return True return True
except FileNotFoundError: except FileNotFoundError:
# mandoc command not found # mandoc command not found
@@ -76,16 +72,27 @@ class ManPageConverter:
# Check if mandoc output indicates this is a symlink/redirect # Check if mandoc output indicates this is a symlink/redirect
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div> # Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div> # or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
symlink_match = re.search(r'<div class="manual-text">(?:See the file )?(/usr/share/man/man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\.</div>', html) # or: <div class="manual-text">See the file man1/builtin.1.</div>
symlink_match = re.search(
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
html,
re.DOTALL,
)
if not symlink_match: if not symlink_match:
# Try simpler pattern without "See the file" or period # Try simpler pattern without "See the file" or period
symlink_match = re.search(r'<div class="manual-text">(/usr/share/man/man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?)</div>', html) symlink_match = re.search(
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
html,
re.DOTALL,
)
if symlink_match: if symlink_match:
name = symlink_match.group(2) name = symlink_match.group(2)
section = symlink_match.group(3) section = symlink_match.group(3)
logger.info(f"{man_file.display_name} detected as symlink to {name}({section})") logger.info(
html = self._generate_redirect_html({'name': name, 'section': section}) f"{man_file.display_name} detected as symlink to {name}({section})"
)
html = self._generate_redirect_html({"name": name, "section": section})
# Store in ManFile object # Store in ManFile object
man_file.html_content = html man_file.html_content = html
@@ -96,7 +103,7 @@ class ManPageConverter:
# Save HTML file # Save HTML file
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f: with open(output_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
logger.debug(f"Converted {man_file.display_name} -> {output_path}") logger.debug(f"Converted {man_file.display_name} -> {output_path}")
@@ -107,9 +114,7 @@ class ManPageConverter:
return False return False
def convert_many( def convert_many(
self, self, man_files: List[tuple], max_workers: int = 10
man_files: List[tuple],
max_workers: int = 10
) -> List[ManFile]: ) -> List[ManFile]:
"""Convert multiple man pages in parallel. """Convert multiple man pages in parallel.
@@ -152,21 +157,21 @@ class ManPageConverter:
""" """
try: try:
result = subprocess.run( result = subprocess.run(
['mandoc', '-T', 'html', '-O', 'fragment,toc'], ["mandoc", "-T", "html", "-O", "fragment,toc"],
input=content.encode('utf-8'), input=content.encode("utf-8"),
capture_output=True, capture_output=True,
timeout=30 timeout=30,
) )
if result.returncode != 0: if result.returncode != 0:
stderr = result.stderr.decode('utf-8', errors='replace') stderr = result.stderr.decode("utf-8", errors="replace")
logger.warning(f"mandoc returned error: {stderr}") logger.warning(f"mandoc returned error: {stderr}")
# Sometimes mandoc returns non-zero but still produces output # Sometimes mandoc returns non-zero but still produces output
if result.stdout: if result.stdout:
return result.stdout.decode('utf-8', errors='replace') return result.stdout.decode("utf-8", errors="replace")
return None return None
return result.stdout.decode('utf-8', errors='replace') return result.stdout.decode("utf-8", errors="replace")
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
logger.error("mandoc conversion timed out") logger.error("mandoc conversion timed out")
@@ -186,14 +191,10 @@ class ManPageConverter:
""" """
# Remove empty parentheses in header cells # Remove empty parentheses in header cells
html = re.sub( html = re.sub(
r'<td class="head-ltitle">\(\)</td>', r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
'<td class="head-ltitle"></td>',
html
) )
html = re.sub( html = re.sub(
r'<td class="head-rtitle">\(\)</td>', r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
'<td class="head-rtitle"></td>',
html
) )
# Strip leading/trailing whitespace # Strip leading/trailing whitespace
@@ -210,8 +211,8 @@ class ManPageConverter:
Returns: Returns:
HTML fragment for redirect page HTML fragment for redirect page
""" """
name = target_info['name'] name = target_info["name"]
section = target_info['section'] section = target_info["section"]
# Generate the relative path to the target man page # Generate the relative path to the target man page
# Symlinks are in the same package, just different file names # Symlinks are in the same package, just different file names
@@ -229,7 +230,6 @@ class ManPageConverter:
return html return html
def _get_output_path(self, man_file: ManFile) -> Path: def _get_output_path(self, man_file: ManFile) -> Path:
"""Determine output path for HTML file. """Determine output path for HTML file.