import os import json import argparse from collections import defaultdict rocky_version = "8.10" def create_sitemap(directory): sitemap = defaultdict(lambda: defaultdict(dict)) for root, dirs, files in os.walk(directory): for file in files: full_filepath = os.path.join(root, file) filepath = full_filepath.split(rocky_version, 1)[-1] # Exclude any path containing 'index.html' if 'index.html' in filepath or 'sitemap.json' in filepath or 'sitemap.xml' in filepath or 'list.json' in filepath or 'list.json.br' in filepath: continue filepath_parts = filepath.split('/') package_name = filepath_parts[1] man_type = filepath_parts[2] man_type_number = man_type.lstrip('man') if man_type.startswith('man') else man_type command_file = filepath_parts[3] command = command_file.split('.html', 1)[0] if filepath.startswith('/'): filepath = filepath[1:] fullname = f"{package_name} - {command}({man_type_number})" # Add command details to sitemap sitemap[package_name][command] = { "url": filepath, "mantype": man_type, "fullname": fullname } return sitemap def convert_sitemap_to_json(sitemap, minify=False): if minify: return json.dumps(sitemap, separators=(',', ':')) return json.dumps(sitemap, indent=4) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Generate sitemap JSON.') parser.add_argument('directory', type=str, help='Directory to scan for HTML files') parser.add_argument('--minify', action='store_true', help='Export minified JSON') args = parser.parse_args() sitemap = create_sitemap(args.directory) json_output = convert_sitemap_to_json(sitemap, minify=args.minify) print(json_output)