CUSP-1339 -- Fixed another usecase
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
@@ -40,11 +40,7 @@ class ManPageConverter:
|
|||||||
try:
|
try:
|
||||||
# Run mandoc with no arguments - it will show usage and exit
|
# Run mandoc with no arguments - it will show usage and exit
|
||||||
# We just want to verify the command exists, not that it succeeds
|
# We just want to verify the command exists, not that it succeeds
|
||||||
subprocess.run(
|
subprocess.run(["mandoc"], capture_output=True, timeout=5)
|
||||||
['mandoc'],
|
|
||||||
capture_output=True,
|
|
||||||
timeout=5
|
|
||||||
)
|
|
||||||
return True
|
return True
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
# mandoc command not found
|
# mandoc command not found
|
||||||
@@ -76,16 +72,27 @@ class ManPageConverter:
|
|||||||
# Check if mandoc output indicates this is a symlink/redirect
|
# Check if mandoc output indicates this is a symlink/redirect
|
||||||
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
|
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
|
||||||
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
|
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
|
||||||
symlink_match = re.search(r'<div class="manual-text">(?:See the file )?(/usr/share/man/man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\.</div>', html)
|
# or: <div class="manual-text">See the file man1/builtin.1.</div>
|
||||||
|
symlink_match = re.search(
|
||||||
|
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
|
||||||
|
html,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
if not symlink_match:
|
if not symlink_match:
|
||||||
# Try simpler pattern without "See the file" or period
|
# Try simpler pattern without "See the file" or period
|
||||||
symlink_match = re.search(r'<div class="manual-text">(/usr/share/man/man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?)</div>', html)
|
symlink_match = re.search(
|
||||||
|
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
|
||||||
|
html,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
|
||||||
if symlink_match:
|
if symlink_match:
|
||||||
name = symlink_match.group(2)
|
name = symlink_match.group(2)
|
||||||
section = symlink_match.group(3)
|
section = symlink_match.group(3)
|
||||||
logger.info(f"{man_file.display_name} detected as symlink to {name}({section})")
|
logger.info(
|
||||||
html = self._generate_redirect_html({'name': name, 'section': section})
|
f"{man_file.display_name} detected as symlink to {name}({section})"
|
||||||
|
)
|
||||||
|
html = self._generate_redirect_html({"name": name, "section": section})
|
||||||
|
|
||||||
# Store in ManFile object
|
# Store in ManFile object
|
||||||
man_file.html_content = html
|
man_file.html_content = html
|
||||||
@@ -96,7 +103,7 @@ class ManPageConverter:
|
|||||||
|
|
||||||
# Save HTML file
|
# Save HTML file
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(output_path, 'w', encoding='utf-8') as f:
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
f.write(html)
|
f.write(html)
|
||||||
|
|
||||||
logger.debug(f"Converted {man_file.display_name} -> {output_path}")
|
logger.debug(f"Converted {man_file.display_name} -> {output_path}")
|
||||||
@@ -107,9 +114,7 @@ class ManPageConverter:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def convert_many(
|
def convert_many(
|
||||||
self,
|
self, man_files: List[tuple], max_workers: int = 10
|
||||||
man_files: List[tuple],
|
|
||||||
max_workers: int = 10
|
|
||||||
) -> List[ManFile]:
|
) -> List[ManFile]:
|
||||||
"""Convert multiple man pages in parallel.
|
"""Convert multiple man pages in parallel.
|
||||||
|
|
||||||
@@ -152,21 +157,21 @@ class ManPageConverter:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
['mandoc', '-T', 'html', '-O', 'fragment,toc'],
|
["mandoc", "-T", "html", "-O", "fragment,toc"],
|
||||||
input=content.encode('utf-8'),
|
input=content.encode("utf-8"),
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
timeout=30
|
timeout=30,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
stderr = result.stderr.decode('utf-8', errors='replace')
|
stderr = result.stderr.decode("utf-8", errors="replace")
|
||||||
logger.warning(f"mandoc returned error: {stderr}")
|
logger.warning(f"mandoc returned error: {stderr}")
|
||||||
# Sometimes mandoc returns non-zero but still produces output
|
# Sometimes mandoc returns non-zero but still produces output
|
||||||
if result.stdout:
|
if result.stdout:
|
||||||
return result.stdout.decode('utf-8', errors='replace')
|
return result.stdout.decode("utf-8", errors="replace")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return result.stdout.decode('utf-8', errors='replace')
|
return result.stdout.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
logger.error("mandoc conversion timed out")
|
logger.error("mandoc conversion timed out")
|
||||||
@@ -186,14 +191,10 @@ class ManPageConverter:
|
|||||||
"""
|
"""
|
||||||
# Remove empty parentheses in header cells
|
# Remove empty parentheses in header cells
|
||||||
html = re.sub(
|
html = re.sub(
|
||||||
r'<td class="head-ltitle">\(\)</td>',
|
r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
|
||||||
'<td class="head-ltitle"></td>',
|
|
||||||
html
|
|
||||||
)
|
)
|
||||||
html = re.sub(
|
html = re.sub(
|
||||||
r'<td class="head-rtitle">\(\)</td>',
|
r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
|
||||||
'<td class="head-rtitle"></td>',
|
|
||||||
html
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Strip leading/trailing whitespace
|
# Strip leading/trailing whitespace
|
||||||
@@ -210,8 +211,8 @@ class ManPageConverter:
|
|||||||
Returns:
|
Returns:
|
||||||
HTML fragment for redirect page
|
HTML fragment for redirect page
|
||||||
"""
|
"""
|
||||||
name = target_info['name']
|
name = target_info["name"]
|
||||||
section = target_info['section']
|
section = target_info["section"]
|
||||||
|
|
||||||
# Generate the relative path to the target man page
|
# Generate the relative path to the target man page
|
||||||
# Symlinks are in the same package, just different file names
|
# Symlinks are in the same package, just different file names
|
||||||
@@ -229,7 +230,6 @@ class ManPageConverter:
|
|||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
def _get_output_path(self, man_file: ManFile) -> Path:
|
def _get_output_path(self, man_file: ManFile) -> Path:
|
||||||
"""Determine output path for HTML file.
|
"""Determine output path for HTML file.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user