diff --git a/rocky_man.py b/rocky_man.py index 1deca0c..5897d05 100644 --- a/rocky_man.py +++ b/rocky_man.py @@ -11,6 +11,8 @@ from urllib.parse import urljoin from typing import List, Dict, Any, Callable from pathlib import Path from jinja2 import Environment, FileSystemLoader +import concurrent.futures +from concurrent.futures import ThreadPoolExecutor, as_completed sitemap = {} @@ -77,22 +79,32 @@ class ManMaker: for member in rpm.getmembers(): if "/man/" in member.name: man_file = ManFile(filelocation=extract_dir / member.name) - man_file.filelocation.parent.mkdir(parents=True, exist_ok=True) - with open(man_file.filelocation, "wb") as f: - f.write(rpm.extractfile(member).read()) + if not man_file.filelocation.exists(): + man_file.filelocation.parent.mkdir(parents=True, exist_ok=True) + with open(man_file.filelocation, "wb") as f: + f.write(rpm.extractfile(member).read()) man_files.append(man_file) - self.get_man_file_contents(package, man_files) - + self.get_man_file_contents(package, man_files) + def get_man_file_contents(self, package: Package, man_files: List[ManFile]): - for man_file in man_files: - try: - man_file.man_text = self.zcat(man_file.filelocation) - self.convert_man_to_html(man_file, package) - except gzip.BadGzipFile as e: - # print(f"{e}: {man_file.filelocation}") - pass - + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(self.process_man_file, man_file, package) for man_file in man_files] + for future in concurrent.futures.as_completed(futures): + try: + future.result() + except Exception as e: + # Handle exceptions if needed + pass + + def process_man_file(self, man_file: ManFile, package: Package): + try: + man_file.man_text = self.zcat(man_file.filelocation) + self.convert_man_to_html(man_file, package) + except gzip.BadGzipFile as e: + # print(f"{e}: {man_file.filelocation}") + pass + def convert_man_to_html(self, man_file: ManFile, package: Package): process = subprocess.Popen( ['mandoc', '-T', 'html', '-O', 'fragment,toc'], @@ -202,7 +214,6 @@ class RepoManager: repo.enabled = self.enabled repo.gpgcheck = self.gpgcheck self.base.repos.add(repo) - print(f"Repository added: {repo.name}") self.base.fill_sack(load_system_repo=False, load_available_repos=True) @@ -272,12 +283,15 @@ class RepoManager: print(f"Error downloading package: {e}") return for package in packages: - download_url = urljoin(package.baseurl, package.location) download_path = self.download_dir / f"{package.filename}" package.download_path = download_path - self.download_file(download_url, download_path) + + if not download_path.exists(): + download_url = urljoin(package.baseurl, package.location) + self.download_file(download_url, download_path) # Process the package immediately after downloading + print(f"Extracting files from {package.filename}...") man_maker.extract_man_files(package) return package @@ -286,11 +300,14 @@ class RepoManager: packages = self.list_packages_object() downloaded_files = [] - for package in packages: - try: - downloaded_files.append(self.download_package(package.name, man_maker)) - except Exception as e: - print(f"Error downloading package: {e}") + with ThreadPoolExecutor() as executor: + future_to_package = {executor.submit(self.download_package, package.name, man_maker): package for package in packages} + for future in as_completed(future_to_package): + package = future_to_package[future] + try: + downloaded_files.append(future.result()) + except Exception as e: + print(f"Error downloading package {package.name}: {e}") return downloaded_files @@ -305,8 +322,7 @@ def save_json(sitemap: Dict[str, Dict[str, Any]], json_file_location: Path): json.dump(sorted_sitemap, f) # Save the gzipped JSON file - gzipped_file_location = f"{json_file_location}.gz" - with gzip.open(gzipped_file_location, "wt") as gz: + with gzip.open(f"{json_file_location}.gz", "wt") as gz: json.dump(sorted_sitemap, gz) def html_folder_export(man_file: ManFile, package: Package, html_base_dir: str) -> Path: