411 lines
13 KiB
Python
411 lines
13 KiB
Python
"""Main entry point for Rocky Man."""
|
|
|
|
import argparse
|
|
import logging
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from .utils.config import Config
|
|
from .repo import RepoManager
|
|
from .processor import ManPageExtractor, ManPageConverter
|
|
from .web import WebGenerator
|
|
|
|
|
|
def setup_logging(verbose: bool = False):
|
|
"""Configure logging."""
|
|
level = logging.DEBUG if verbose else logging.INFO
|
|
logging.basicConfig(
|
|
level=level,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
|
|
|
|
def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
|
"""Process a single Rocky Linux version.
|
|
|
|
Args:
|
|
config: Configuration object
|
|
version: Rocky Linux version to process
|
|
template_dir: Path to templates directory
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
logger = logging.getLogger(__name__)
|
|
logger.info(f"Processing Rocky Linux {version}")
|
|
|
|
# Setup directories for this version
|
|
version_download_dir = config.get_version_download_dir(version)
|
|
version_extract_dir = config.get_version_extract_dir(version)
|
|
version_output_dir = config.get_version_output_dir(version)
|
|
|
|
all_man_files = []
|
|
|
|
# Process each repository type
|
|
for repo_type in config.repo_types:
|
|
logger.info(f"Processing {repo_type} repository")
|
|
|
|
# Use first available architecture (man pages are arch-independent)
|
|
arch = config.architectures[0]
|
|
|
|
# Create cache dir for this repo
|
|
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
|
|
|
|
try:
|
|
# Initialize repository manager
|
|
repo_manager = RepoManager(
|
|
config=config,
|
|
version=version,
|
|
repo_type=repo_type,
|
|
arch=arch,
|
|
cache_dir=cache_dir,
|
|
download_dir=version_download_dir,
|
|
)
|
|
|
|
# List packages (with man pages only)
|
|
packages = repo_manager.list_packages(with_manpages_only=True)
|
|
|
|
if not packages:
|
|
logger.warning(f"No packages found in {repo_type}")
|
|
continue
|
|
|
|
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
|
|
|
|
# Filter out packages that should be skipped
|
|
if config.skip_packages:
|
|
original_count = len(packages)
|
|
packages = [
|
|
pkg for pkg in packages if pkg.name not in config.skip_packages
|
|
]
|
|
filtered_count = original_count - len(packages)
|
|
if filtered_count > 0:
|
|
logger.info(
|
|
f"Filtered out {filtered_count} packages based on skip list"
|
|
)
|
|
logger.info(f"Processing {len(packages)} packages")
|
|
|
|
# Download packages
|
|
logger.info("Downloading packages...")
|
|
downloaded = repo_manager.download_packages(
|
|
packages, max_workers=config.parallel_downloads
|
|
)
|
|
|
|
# Extract man pages
|
|
logger.info("Extracting man pages...")
|
|
extractor = ManPageExtractor(
|
|
version_extract_dir,
|
|
skip_sections=config.skip_sections,
|
|
skip_languages=config.skip_languages,
|
|
)
|
|
man_files = extractor.extract_from_packages(
|
|
downloaded, max_workers=config.parallel_downloads
|
|
)
|
|
|
|
logger.info(f"Extracted {len(man_files)} man pages")
|
|
|
|
# Read content for each man file
|
|
logger.info("Reading man page content...")
|
|
man_files_with_content = []
|
|
for man_file in man_files:
|
|
content = extractor.read_manpage_content(man_file)
|
|
if content:
|
|
man_files_with_content.append((man_file, content))
|
|
|
|
# Convert to HTML
|
|
logger.info("Converting man pages to HTML...")
|
|
converter = ManPageConverter(version_output_dir)
|
|
converted = converter.convert_many(
|
|
man_files_with_content, max_workers=config.parallel_conversions
|
|
)
|
|
|
|
all_man_files.extend(converted)
|
|
|
|
# Cleanup if requested
|
|
if not config.keep_rpms:
|
|
logger.info("Cleaning up downloaded packages...")
|
|
for package in downloaded:
|
|
repo_manager.cleanup_package(package)
|
|
|
|
if not config.keep_extracts:
|
|
logger.info("Cleaning up extracted files...")
|
|
for package in downloaded:
|
|
extractor.cleanup_extracts(package)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing {repo_type}: {e}", exc_info=True)
|
|
continue
|
|
|
|
if not all_man_files:
|
|
logger.error(f"No man pages were successfully processed for version {version}")
|
|
return False
|
|
|
|
# Generate web pages
|
|
logger.info("Generating web pages...")
|
|
web_gen = WebGenerator(template_dir, config.output_dir)
|
|
|
|
# Generate search index
|
|
search_index = web_gen.generate_search_index(all_man_files, version)
|
|
web_gen.save_search_index(search_index, version)
|
|
|
|
# Generate index page
|
|
web_gen.generate_index(version, search_index)
|
|
|
|
# Generate packages index page
|
|
web_gen.generate_packages_index(version, search_index)
|
|
|
|
# Set HTML paths for all man files
|
|
for man_file in all_man_files:
|
|
if not man_file.html_path:
|
|
man_file.html_path = web_gen._get_manpage_path(man_file, version)
|
|
|
|
# Link cross-references between man pages
|
|
logger.info("Linking cross-references...")
|
|
converter.link_cross_references(all_man_files, version)
|
|
|
|
# Wrap man pages in templates
|
|
logger.info("Generating man page HTML...")
|
|
for man_file in all_man_files:
|
|
web_gen.generate_manpage_html(man_file, version)
|
|
|
|
logger.info(
|
|
f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}"
|
|
)
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate HTML documentation for Rocky Linux man pages"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--versions",
|
|
nargs="+",
|
|
default=["8.10", "9.6", "10.0"],
|
|
help="Rocky Linux versions to process (default: 8.10 9.6 10.0)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--repo-types",
|
|
nargs="+",
|
|
default=["BaseOS", "AppStream"],
|
|
help="Repository types to process (default: BaseOS AppStream)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
type=Path,
|
|
default=Path("./html"),
|
|
help="Output directory for HTML files (default: ./html)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--download-dir",
|
|
type=Path,
|
|
default=Path("./tmp/downloads"),
|
|
help="Directory for downloading packages (default: ./tmp/downloads)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--extract-dir",
|
|
type=Path,
|
|
default=Path("./tmp/extracts"),
|
|
help="Directory for extracting man pages (default: ./tmp/extracts)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--keep-rpms",
|
|
action="store_true",
|
|
help="Keep downloaded RPM files after processing",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--keep-extracts",
|
|
action="store_true",
|
|
help="Keep extracted man files after processing",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--parallel-downloads",
|
|
type=int,
|
|
default=5,
|
|
help="Number of parallel downloads (default: 5)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--parallel-conversions",
|
|
type=int,
|
|
default=10,
|
|
help="Number of parallel HTML conversions (default: 10)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--mirror",
|
|
default="http://dl.rockylinux.org/",
|
|
help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--vault",
|
|
action="store_true",
|
|
help="Use vault directory instead of pub (vault/rocky instead of pub/rocky)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--existing-versions",
|
|
nargs="*",
|
|
metavar="VERSION",
|
|
help="List of existing versions to include in root index (e.g., 8.10 9.7)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--template-dir",
|
|
type=Path,
|
|
default=Path(__file__).parent.parent.parent / "templates",
|
|
help="Template directory (default: ./templates)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-v", "--verbose", action="store_true", help="Enable verbose logging"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--skip-sections",
|
|
nargs="*",
|
|
default=None,
|
|
help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--skip-packages",
|
|
nargs="*",
|
|
default=None,
|
|
help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--skip-languages",
|
|
action="store_true",
|
|
default=None,
|
|
help="Skip non-English man pages (default: enabled)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--keep-languages",
|
|
action="store_true",
|
|
help="Keep all languages (disables --skip-languages)",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--allow-all-sections",
|
|
action="store_true",
|
|
help="Include all man sections (overrides --skip-sections)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Setup logging
|
|
setup_logging(args.verbose)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Handle filtering options
|
|
skip_languages = True # default
|
|
if args.keep_languages:
|
|
skip_languages = False
|
|
elif args.skip_languages is not None:
|
|
skip_languages = args.skip_languages
|
|
|
|
# Determine content directory
|
|
content_dir = "vault/rocky" if args.vault else "pub/rocky"
|
|
|
|
# Create configuration
|
|
config = Config(
|
|
base_url=args.mirror,
|
|
content_dir=content_dir,
|
|
versions=args.versions,
|
|
repo_types=args.repo_types,
|
|
download_dir=args.download_dir,
|
|
extract_dir=args.extract_dir,
|
|
output_dir=args.output_dir,
|
|
keep_rpms=args.keep_rpms,
|
|
keep_extracts=args.keep_extracts,
|
|
parallel_downloads=args.parallel_downloads,
|
|
parallel_conversions=args.parallel_conversions,
|
|
skip_sections=args.skip_sections,
|
|
skip_packages=args.skip_packages,
|
|
skip_languages=skip_languages,
|
|
allow_all_sections=args.allow_all_sections,
|
|
)
|
|
|
|
# Get existing versions from scan and argument
|
|
scanned_versions = [
|
|
d.name
|
|
for d in config.output_dir.iterdir()
|
|
if d.is_dir() and re.match(r"\d+\.\d+", d.name)
|
|
]
|
|
arg_versions = args.existing_versions or []
|
|
|
|
# Sort versions numerically by (major, minor)
|
|
def version_key(v):
|
|
try:
|
|
major, minor = v.split(".")
|
|
return (int(major), int(minor))
|
|
except (ValueError, AttributeError):
|
|
return (0, 0)
|
|
|
|
existing_versions = sorted(set(scanned_versions + arg_versions), key=version_key)
|
|
all_versions = sorted(set(existing_versions + config.versions), key=version_key)
|
|
|
|
logger.info("Rocky Man - Rocky Linux Man Page Generator")
|
|
logger.info(f"Versions to process: {', '.join(config.versions)}")
|
|
logger.info(f"All known versions: {', '.join(all_versions)}")
|
|
logger.info(f"Repositories: {', '.join(config.repo_types)}")
|
|
logger.info(f"Output directory: {config.output_dir}")
|
|
|
|
# Log filtering configuration
|
|
if config.skip_sections:
|
|
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
|
|
else:
|
|
logger.info("Including all man sections")
|
|
|
|
if config.skip_packages:
|
|
logger.info(f"Skipping packages: {', '.join(config.skip_packages)}")
|
|
|
|
if config.skip_languages:
|
|
logger.info("Skipping non-English languages")
|
|
else:
|
|
logger.info("Including all languages")
|
|
|
|
# Process each version
|
|
processed_versions = []
|
|
for version in config.versions:
|
|
try:
|
|
if process_version(config, version, args.template_dir):
|
|
processed_versions.append(version)
|
|
except Exception as e:
|
|
logger.error(f"Failed to process version {version}: {e}", exc_info=True)
|
|
|
|
if not processed_versions:
|
|
logger.error("No versions were successfully processed")
|
|
return 1
|
|
|
|
# Generate root index
|
|
logger.info("Generating root index page...")
|
|
web_gen = WebGenerator(args.template_dir, config.output_dir)
|
|
web_gen.generate_root_index(all_versions)
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("Processing complete!")
|
|
logger.info(f"Generated documentation for: {', '.join(processed_versions)}")
|
|
logger.info(f"Output directory: {config.output_dir.absolute()}")
|
|
logger.info("=" * 60)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|