From 907d92bb162654fd175afcccf671198ac5f1157a Mon Sep 17 00:00:00 2001 From: Stephen Simpson Date: Thu, 4 Dec 2025 12:15:33 -0600 Subject: [PATCH] CUSP-1339 -- Fixed another usecase Signed-off-by: Stephen Simpson --- src/rocky_man/processor/converter.py | 56 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/rocky_man/processor/converter.py b/src/rocky_man/processor/converter.py index 4b8ed06..1479cbc 100644 --- a/src/rocky_man/processor/converter.py +++ b/src/rocky_man/processor/converter.py @@ -40,11 +40,7 @@ class ManPageConverter: try: # Run mandoc with no arguments - it will show usage and exit # We just want to verify the command exists, not that it succeeds - subprocess.run( - ['mandoc'], - capture_output=True, - timeout=5 - ) + subprocess.run(["mandoc"], capture_output=True, timeout=5) return True except FileNotFoundError: # mandoc command not found @@ -76,16 +72,27 @@ class ManPageConverter: # Check if mandoc output indicates this is a symlink/redirect # Pattern:
/usr/share/man/man8/target.8.gz
# or:
See the file /usr/share/man/man8/target.8.
- symlink_match = re.search(r'
(?:See the file )?(/usr/share/man/man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\.
', html) + # or:
See the file man1/builtin.1.
+ symlink_match = re.search( + r'
.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?
', + html, + re.DOTALL, + ) if not symlink_match: # Try simpler pattern without "See the file" or period - symlink_match = re.search(r'
(/usr/share/man/man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?)
', html) + symlink_match = re.search( + r'
.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?
', + html, + re.DOTALL, + ) if symlink_match: name = symlink_match.group(2) section = symlink_match.group(3) - logger.info(f"{man_file.display_name} detected as symlink to {name}({section})") - html = self._generate_redirect_html({'name': name, 'section': section}) + logger.info( + f"{man_file.display_name} detected as symlink to {name}({section})" + ) + html = self._generate_redirect_html({"name": name, "section": section}) # Store in ManFile object man_file.html_content = html @@ -96,7 +103,7 @@ class ManPageConverter: # Save HTML file output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w', encoding='utf-8') as f: + with open(output_path, "w", encoding="utf-8") as f: f.write(html) logger.debug(f"Converted {man_file.display_name} -> {output_path}") @@ -107,9 +114,7 @@ class ManPageConverter: return False def convert_many( - self, - man_files: List[tuple], - max_workers: int = 10 + self, man_files: List[tuple], max_workers: int = 10 ) -> List[ManFile]: """Convert multiple man pages in parallel. @@ -152,21 +157,21 @@ class ManPageConverter: """ try: result = subprocess.run( - ['mandoc', '-T', 'html', '-O', 'fragment,toc'], - input=content.encode('utf-8'), + ["mandoc", "-T", "html", "-O", "fragment,toc"], + input=content.encode("utf-8"), capture_output=True, - timeout=30 + timeout=30, ) if result.returncode != 0: - stderr = result.stderr.decode('utf-8', errors='replace') + stderr = result.stderr.decode("utf-8", errors="replace") logger.warning(f"mandoc returned error: {stderr}") # Sometimes mandoc returns non-zero but still produces output if result.stdout: - return result.stdout.decode('utf-8', errors='replace') + return result.stdout.decode("utf-8", errors="replace") return None - return result.stdout.decode('utf-8', errors='replace') + return result.stdout.decode("utf-8", errors="replace") except subprocess.TimeoutExpired: logger.error("mandoc conversion timed out") @@ -186,14 +191,10 @@ class ManPageConverter: """ # Remove empty parentheses in header cells html = re.sub( - r'\(\)', - '', - html + r'\(\)', '', html ) html = re.sub( - r'\(\)', - '', - html + r'\(\)', '', html ) # Strip leading/trailing whitespace @@ -210,8 +211,8 @@ class ManPageConverter: Returns: HTML fragment for redirect page """ - name = target_info['name'] - section = target_info['section'] + name = target_info["name"] + section = target_info["section"] # Generate the relative path to the target man page # Symlinks are in the same package, just different file names @@ -229,7 +230,6 @@ class ManPageConverter: return html - def _get_output_path(self, man_file: ManFile) -> Path: """Determine output path for HTML file.