Implement concurrent processing for man file extraction and package downloads
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
This commit is contained in:
56
rocky_man.py
56
rocky_man.py
@@ -11,6 +11,8 @@ from urllib.parse import urljoin
|
|||||||
from typing import List, Dict, Any, Callable
|
from typing import List, Dict, Any, Callable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
import concurrent.futures
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
sitemap = {}
|
sitemap = {}
|
||||||
|
|
||||||
@@ -77,21 +79,31 @@ class ManMaker:
|
|||||||
for member in rpm.getmembers():
|
for member in rpm.getmembers():
|
||||||
if "/man/" in member.name:
|
if "/man/" in member.name:
|
||||||
man_file = ManFile(filelocation=extract_dir / member.name)
|
man_file = ManFile(filelocation=extract_dir / member.name)
|
||||||
man_file.filelocation.parent.mkdir(parents=True, exist_ok=True)
|
if not man_file.filelocation.exists():
|
||||||
with open(man_file.filelocation, "wb") as f:
|
man_file.filelocation.parent.mkdir(parents=True, exist_ok=True)
|
||||||
f.write(rpm.extractfile(member).read())
|
with open(man_file.filelocation, "wb") as f:
|
||||||
|
f.write(rpm.extractfile(member).read())
|
||||||
man_files.append(man_file)
|
man_files.append(man_file)
|
||||||
|
|
||||||
self.get_man_file_contents(package, man_files)
|
self.get_man_file_contents(package, man_files)
|
||||||
|
|
||||||
def get_man_file_contents(self, package: Package, man_files: List[ManFile]):
|
def get_man_file_contents(self, package: Package, man_files: List[ManFile]):
|
||||||
for man_file in man_files:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
try:
|
futures = [executor.submit(self.process_man_file, man_file, package) for man_file in man_files]
|
||||||
man_file.man_text = self.zcat(man_file.filelocation)
|
for future in concurrent.futures.as_completed(futures):
|
||||||
self.convert_man_to_html(man_file, package)
|
try:
|
||||||
except gzip.BadGzipFile as e:
|
future.result()
|
||||||
# print(f"{e}: {man_file.filelocation}")
|
except Exception as e:
|
||||||
pass
|
# Handle exceptions if needed
|
||||||
|
pass
|
||||||
|
|
||||||
|
def process_man_file(self, man_file: ManFile, package: Package):
|
||||||
|
try:
|
||||||
|
man_file.man_text = self.zcat(man_file.filelocation)
|
||||||
|
self.convert_man_to_html(man_file, package)
|
||||||
|
except gzip.BadGzipFile as e:
|
||||||
|
# print(f"{e}: {man_file.filelocation}")
|
||||||
|
pass
|
||||||
|
|
||||||
def convert_man_to_html(self, man_file: ManFile, package: Package):
|
def convert_man_to_html(self, man_file: ManFile, package: Package):
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
@@ -202,7 +214,6 @@ class RepoManager:
|
|||||||
repo.enabled = self.enabled
|
repo.enabled = self.enabled
|
||||||
repo.gpgcheck = self.gpgcheck
|
repo.gpgcheck = self.gpgcheck
|
||||||
self.base.repos.add(repo)
|
self.base.repos.add(repo)
|
||||||
print(f"Repository added: {repo.name}")
|
|
||||||
|
|
||||||
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
||||||
|
|
||||||
@@ -272,12 +283,15 @@ class RepoManager:
|
|||||||
print(f"Error downloading package: {e}")
|
print(f"Error downloading package: {e}")
|
||||||
return
|
return
|
||||||
for package in packages:
|
for package in packages:
|
||||||
download_url = urljoin(package.baseurl, package.location)
|
|
||||||
download_path = self.download_dir / f"{package.filename}"
|
download_path = self.download_dir / f"{package.filename}"
|
||||||
package.download_path = download_path
|
package.download_path = download_path
|
||||||
self.download_file(download_url, download_path)
|
|
||||||
|
if not download_path.exists():
|
||||||
|
download_url = urljoin(package.baseurl, package.location)
|
||||||
|
self.download_file(download_url, download_path)
|
||||||
|
|
||||||
# Process the package immediately after downloading
|
# Process the package immediately after downloading
|
||||||
|
print(f"Extracting files from {package.filename}...")
|
||||||
man_maker.extract_man_files(package)
|
man_maker.extract_man_files(package)
|
||||||
|
|
||||||
return package
|
return package
|
||||||
@@ -286,11 +300,14 @@ class RepoManager:
|
|||||||
packages = self.list_packages_object()
|
packages = self.list_packages_object()
|
||||||
downloaded_files = []
|
downloaded_files = []
|
||||||
|
|
||||||
for package in packages:
|
with ThreadPoolExecutor() as executor:
|
||||||
try:
|
future_to_package = {executor.submit(self.download_package, package.name, man_maker): package for package in packages}
|
||||||
downloaded_files.append(self.download_package(package.name, man_maker))
|
for future in as_completed(future_to_package):
|
||||||
except Exception as e:
|
package = future_to_package[future]
|
||||||
print(f"Error downloading package: {e}")
|
try:
|
||||||
|
downloaded_files.append(future.result())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error downloading package {package.name}: {e}")
|
||||||
|
|
||||||
return downloaded_files
|
return downloaded_files
|
||||||
|
|
||||||
@@ -305,8 +322,7 @@ def save_json(sitemap: Dict[str, Dict[str, Any]], json_file_location: Path):
|
|||||||
json.dump(sorted_sitemap, f)
|
json.dump(sorted_sitemap, f)
|
||||||
|
|
||||||
# Save the gzipped JSON file
|
# Save the gzipped JSON file
|
||||||
gzipped_file_location = f"{json_file_location}.gz"
|
with gzip.open(f"{json_file_location}.gz", "wt") as gz:
|
||||||
with gzip.open(gzipped_file_location, "wt") as gz:
|
|
||||||
json.dump(sorted_sitemap, gz)
|
json.dump(sorted_sitemap, gz)
|
||||||
|
|
||||||
def html_folder_export(man_file: ManFile, package: Package, html_base_dir: str) -> Path:
|
def html_folder_export(man_file: ManFile, package: Package, html_base_dir: str) -> Path:
|
||||||
|
|||||||
Reference in New Issue
Block a user