Implement concurrent processing for man file extraction and package downloads
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
56
rocky_man.py
56
rocky_man.py
@@ -11,6 +11,8 @@ from urllib.parse import urljoin
|
||||
from typing import List, Dict, Any, Callable
|
||||
from pathlib import Path
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
import concurrent.futures
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
sitemap = {}
|
||||
|
||||
@@ -77,21 +79,31 @@ class ManMaker:
|
||||
for member in rpm.getmembers():
|
||||
if "/man/" in member.name:
|
||||
man_file = ManFile(filelocation=extract_dir / member.name)
|
||||
man_file.filelocation.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(man_file.filelocation, "wb") as f:
|
||||
f.write(rpm.extractfile(member).read())
|
||||
if not man_file.filelocation.exists():
|
||||
man_file.filelocation.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(man_file.filelocation, "wb") as f:
|
||||
f.write(rpm.extractfile(member).read())
|
||||
man_files.append(man_file)
|
||||
|
||||
self.get_man_file_contents(package, man_files)
|
||||
|
||||
def get_man_file_contents(self, package: Package, man_files: List[ManFile]):
|
||||
for man_file in man_files:
|
||||
try:
|
||||
man_file.man_text = self.zcat(man_file.filelocation)
|
||||
self.convert_man_to_html(man_file, package)
|
||||
except gzip.BadGzipFile as e:
|
||||
# print(f"{e}: {man_file.filelocation}")
|
||||
pass
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = [executor.submit(self.process_man_file, man_file, package) for man_file in man_files]
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
try:
|
||||
future.result()
|
||||
except Exception as e:
|
||||
# Handle exceptions if needed
|
||||
pass
|
||||
|
||||
def process_man_file(self, man_file: ManFile, package: Package):
|
||||
try:
|
||||
man_file.man_text = self.zcat(man_file.filelocation)
|
||||
self.convert_man_to_html(man_file, package)
|
||||
except gzip.BadGzipFile as e:
|
||||
# print(f"{e}: {man_file.filelocation}")
|
||||
pass
|
||||
|
||||
def convert_man_to_html(self, man_file: ManFile, package: Package):
|
||||
process = subprocess.Popen(
|
||||
@@ -202,7 +214,6 @@ class RepoManager:
|
||||
repo.enabled = self.enabled
|
||||
repo.gpgcheck = self.gpgcheck
|
||||
self.base.repos.add(repo)
|
||||
print(f"Repository added: {repo.name}")
|
||||
|
||||
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
||||
|
||||
@@ -272,12 +283,15 @@ class RepoManager:
|
||||
print(f"Error downloading package: {e}")
|
||||
return
|
||||
for package in packages:
|
||||
download_url = urljoin(package.baseurl, package.location)
|
||||
download_path = self.download_dir / f"{package.filename}"
|
||||
package.download_path = download_path
|
||||
self.download_file(download_url, download_path)
|
||||
|
||||
if not download_path.exists():
|
||||
download_url = urljoin(package.baseurl, package.location)
|
||||
self.download_file(download_url, download_path)
|
||||
|
||||
# Process the package immediately after downloading
|
||||
print(f"Extracting files from {package.filename}...")
|
||||
man_maker.extract_man_files(package)
|
||||
|
||||
return package
|
||||
@@ -286,11 +300,14 @@ class RepoManager:
|
||||
packages = self.list_packages_object()
|
||||
downloaded_files = []
|
||||
|
||||
for package in packages:
|
||||
try:
|
||||
downloaded_files.append(self.download_package(package.name, man_maker))
|
||||
except Exception as e:
|
||||
print(f"Error downloading package: {e}")
|
||||
with ThreadPoolExecutor() as executor:
|
||||
future_to_package = {executor.submit(self.download_package, package.name, man_maker): package for package in packages}
|
||||
for future in as_completed(future_to_package):
|
||||
package = future_to_package[future]
|
||||
try:
|
||||
downloaded_files.append(future.result())
|
||||
except Exception as e:
|
||||
print(f"Error downloading package {package.name}: {e}")
|
||||
|
||||
return downloaded_files
|
||||
|
||||
@@ -305,8 +322,7 @@ def save_json(sitemap: Dict[str, Dict[str, Any]], json_file_location: Path):
|
||||
json.dump(sorted_sitemap, f)
|
||||
|
||||
# Save the gzipped JSON file
|
||||
gzipped_file_location = f"{json_file_location}.gz"
|
||||
with gzip.open(gzipped_file_location, "wt") as gz:
|
||||
with gzip.open(f"{json_file_location}.gz", "wt") as gz:
|
||||
json.dump(sorted_sitemap, gz)
|
||||
|
||||
def html_folder_export(man_file: ManFile, package: Package, html_base_dir: str) -> Path:
|
||||
|
||||
Reference in New Issue
Block a user