10 Commits

Author SHA1 Message Date
Stephen Simpson
c5651d6926 Improve README.md: Update command-line options for clarity and add new arguments
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-05 09:28:41 -06:00
Stephen Simpson
70414a552d CUSP-1346 - Add support for existing-folders
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-05 09:19:52 -06:00
Stephen Simpson
1c6fa0e98c CUSP-1344 - Fix version layout on homepage
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 16:22:57 -06:00
Stephen Simpson
68b9310862 CUSP-1345 CUSP-1343 CUSP-1344 - Add older version ability
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 15:49:42 -06:00
Stephen Simpson
890d7fc8f9 CUSP-1342 - Fix See Also
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 12:40:26 -06:00
Stephen Simpson
907d92bb16 CUSP-1339 -- Fixed another usecase
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 12:15:33 -06:00
Stephen Simpson
ffc0d11bbb CUSP-1339 -- Remove TM
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 12:01:57 -06:00
Stephen Simpson
fc2f024d60 CUSP-1341 - Fix man pages that link to other pages
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 11:53:39 -06:00
Stephen Simpson
47db0185c5 Remove open source attribution notice from footer
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 11:46:14 -06:00
Stephen Simpson
f474c238dc CUSP-1340
Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 09:52:19 -06:00
17 changed files with 544 additions and 573 deletions

View File

@@ -25,26 +25,39 @@ on:
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
container:
image: rockylinux:9
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Build Docker image - name: Install system dependencies
run: | run: |
docker build -t rocky-man:latest . dnf install -y \
python3.11 \
python3.11-pip \
mandoc \
rpm-build \
dnf-plugins-core \
git
- name: Create output directories - name: Install UV
run: | run: |
mkdir -p ./html ./tmp curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Build man pages in container - name: Install Python dependencies
run: | run: |
docker run --rm \ uv pip install --system -e .
-v "$(pwd)/html:/data/html" \
-v "$(pwd)/tmp:/data/tmp" \ - name: Build man pages
rocky-man:latest \ run: |
--versions ${{ github.event.inputs.versions || '8.10 9.6 10.0' }} \ python3.11 -m rocky_man.main \
--versions ${{ github.event.inputs.versions || '8.10 9.5' }} \
--output-dir ./html \
--download-dir ./tmp/downloads \
--extract-dir ./tmp/extracts \
--verbose --verbose
env: env:
PYTHONUNBUFFERED: 1 PYTHONUNBUFFERED: 1
@@ -55,3 +68,11 @@ jobs:
name: rocky-man-pages name: rocky-man-pages
path: html/ path: html/
retention-days: 30 retention-days: 30
- name: Deploy to GitHub Pages
if: github.ref == 'refs/heads/main'
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./html
force_orphan: true

View File

@@ -18,7 +18,7 @@ RUN dnf install -y epel-release \
WORKDIR /app WORKDIR /app
# Copy project files # Copy project files
COPY pyproject.toml README.md LICENSE ./ COPY pyproject.toml README.md LICENSE THIRD-PARTY-LICENSES.md ./
COPY src ./src COPY src ./src
COPY templates ./templates COPY templates ./templates

154
Jenkinsfile vendored
View File

@@ -1,154 +0,0 @@
// Jenkinsfile for Rocky Man
pipeline {
agent {
kubernetes {
yaml """
apiVersion: v1
kind: Pod
metadata:
labels:
jenkins: agent
spec:
containers:
- name: docker
image: docker:24-dind
securityContext:
privileged: true
volumeMounts:
- name: docker-sock
mountPath: /var/run
command:
- dockerd-entrypoint.sh
- name: docker-cli
image: docker:24-cli
command:
- cat
tty: true
volumeMounts:
- name: docker-sock
mountPath: /var/run
- name: b2
image: backblazeit/b2:latest
command:
- cat
tty: true
volumes:
- name: docker-sock
emptyDir: {}
"""
}
}
parameters {
string(
name: 'VERSIONS',
defaultValue: '8.10 9.7 10.1',
description: 'Rocky Linux versions to build (space-separated)'
)
string(
name: 'B2_BUCKET_NAME',
defaultValue: 'rockyman',
description: 'B2 bucket name for uploads'
)
string(
name: 'EXISTING_VERSIONS',
defaultValue: '',
description: 'Existing versions already built (space-separated)'
)
string(
name: 'PARALLEL_DOWNLOADS',
defaultValue: '5',
description: 'Number of parallel downloads'
)
string(
name: 'PARALLEL_CONVERSIONS',
defaultValue: '10',
description: 'Number of parallel conversions'
)
}
options {
buildDiscarder(logRotator(numToKeepStr: '10'))
timeout(time: 2, unit: 'HOURS')
}
stages {
stage('Checkout') {
steps {
checkout scm
}
}
stage('Build Docker Image') {
steps {
container('docker-cli') {
sh '''
docker build -t rocky-man:${BUILD_NUMBER} .
docker tag rocky-man:${BUILD_NUMBER} rocky-man:latest
'''
}
}
}
stage('Build Man Pages') {
steps {
container('docker-cli') {
sh '''
# Create output directories
mkdir -p ./html ./tmp
# Run the container to build man pages
docker run --rm \
-v "$(pwd)/html:/data/html" \
-v "$(pwd)/tmp:/data/tmp" \
rocky-man:${BUILD_NUMBER} \
--versions ${VERSIONS} \
--parallel-downloads ${PARALLEL_DOWNLOADS} \
--parallel-conversions ${PARALLEL_CONVERSIONS} \
--existing-versions ${EXISTING_VERSIONS}
'''
}
}
}
stage('Upload to B2') {
when {
expression { return params.B2_BUCKET_NAME != "" }
}
steps {
container('docker-cli') {
withCredentials([
string(credentialsId: 'b2-app-id', variable: 'B2_APPLICATION_KEY_ID'),
string(credentialsId: 'b2-app-key', variable: 'B2_APPLICATION_KEY')
]) {
sh '''
docker run --rm \
-v "$(pwd)/html:/workspace/html" \
-e B2_APPLICATION_KEY \
-e B2_APPLICATION_KEY_ID \
backblazeit/b2:latest \
b2v4 sync --compare-versions size /workspace/html/ "b2://${B2_BUCKET_NAME}/"
'''
}
}
}
}
}
post {
success {
echo 'Build completed and uploaded to B2!'
}
failure {
echo 'Build failed!'
}
cleanup {
container('docker-cli') {
sh '''
docker rmi rocky-man:${BUILD_NUMBER} || true
docker rmi rocky-man:latest || true
'''
}
}
}
}

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2025 Ctrl IQ, Inc. Copyright (c) 2024 Stephen Simpson
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

278
README.md
View File

@@ -1,108 +1,133 @@
# 🚀 Rocky Man 🚀 # Rocky Man 📚
**Rocky Man** is a tool for generating searchable HTML documentation from Rocky Linux man pages across BaseOS and AppStream repositories for Rocky Linux 8, 9, and 10. **Rocky Man** is a tool for generating searchable HTML documentation from Rocky Linux man pages across BaseOS and AppStream repositories for Rocky Linux 8, 9, and 10.
## Features ## Features
- Uses filelists.xml to pre-filter packages with man pages - **Fast & Efficient**: Uses filelists.xml to pre-filter packages with man pages
- Processes packages from BaseOS and AppStream repositories - **Complete Coverage**: All packages from BaseOS and AppStream repositories
- Runs in containers on x86_64, aarch64, and arm64 architectures - **Container Ready**: Works on x86_64, aarch64, arm64, etc.
- Configurable cleanup of temporary files - **Smart Cleanup**: Automatic cleanup of temporary files (configurable)
- Concurrent downloads and conversions - **Parallel Processing**: Concurrent downloads and conversions for maximum speed
- Supports Rocky Linux 8, 9, and 10 - **Multi-version**: Support for Rocky Linux 8, 9, and 10 simultaneously
## Quick Start ## Quick Start
### Podman ### Podman (Recommended)
```bash
# Build the image
podman build -t rocky-man .
# Generate man pages for Rocky Linux 9.6 (using defaults, no custom args)
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man
# Generate for specific versions (requires explicit paths)
podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
--versions 8.10 9.6 10.0 --output-dir /app/html
# With verbose logging
podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
--versions 9.6 --output-dir /app/html --verbose
# Keep downloaded RPMs (mount the download directory)
podman run --rm -it \
-v $(pwd)/html:/app/html:Z \
-v $(pwd)/downloads:/app/tmp/downloads:Z \
rocky-man --versions 9.6 --keep-rpms \
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
```
### Docker
```bash ```bash
# Build the image # Build the image
docker build -t rocky-man . docker build -t rocky-man .
# Generate for specific versions # Generate man pages (using defaults, no custom args)
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man \ docker run --rm -v $(pwd)/html:/data/html rocky-man
--versions 8.10 9.6 10.0
# Keep downloaded RPMs for multiple builds # Generate for specific versions (requires explicit paths)
podman run --rm -it \ docker run --rm -v $(pwd)/html:/app/html rocky-man \
-v $(pwd)/html:/data/html:Z \ --versions 9.6 --output-dir /app/html
-v $(pwd)/downloads:/data/tmp/downloads:Z \
rocky-man --versions 9.6 --keep-rpms --verbose
```
### View the HTML Locally # Interactive mode for debugging
docker run --rm -it -v $(pwd)/html:/app/html rocky-man \
--versions 9.6 --output-dir /app/html --verbose
Start a local web server to browse the generated documentation: # Keep downloaded RPMs (mount the download directory)
docker run --rm -it \
```bash -v $(pwd)/html:/app/html \
python3 -m http.server -d ./html -v $(pwd)/downloads:/app/tmp/downloads \
``` rocky-man --versions 9.6 --keep-rpms \
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
Then open [http://127.0.0.1:8000](http://127.0.0.1:8000) in your browser.
To use a different port:
```bash
python3 -m http.server 8080 -d ./html
``` ```
### Directory Structure in Container ### Directory Structure in Container
The container uses the following paths: The container uses different paths depending on whether you pass custom arguments:
**Without custom arguments** (using Dockerfile CMD defaults):
- `/data/html` - Generated HTML output - `/data/html` - Generated HTML output
- `/data/tmp/downloads` - Downloaded RPM files - `/data/tmp/downloads` - Downloaded RPM files
- `/data/tmp/extracts` - Extracted man page files - `/data/tmp/extracts` - Extracted man page files
These paths are used by default and can be overridden with command-line arguments if needed. **With custom arguments** (argparse defaults from working directory `/app`):
- `/app/html` - Generated HTML output
- `/app/tmp/downloads` - Downloaded RPM files
- `/app/tmp/extracts` - Extracted man page files
**Important**: When passing custom arguments, the container's CMD is overridden and the code falls back to relative paths (`./html` = `/app/html`). You must explicitly specify `--output-dir /app/html --download-dir /app/tmp/downloads` to match your volume mounts. Without this, files are written inside the container and lost when it stops (especially with `--rm`).
### Local Development ### Local Development
**Important**: Rocky Man requires Rocky Linux because it uses the system's native `python3-dnf` module to interact with DNF repositories. This module cannot be installed via pip and must come from the Rocky Linux system packages. #### Prerequisites
#### Option 1: Run in a Rocky Linux Container (Recommended) - Python 3.9+
- pip (Python package manager)
- mandoc (man page converter)
- Rocky Linux system or container (for DNF)
#### Installation
```bash ```bash
# Start a Rocky Linux container with your project mounted # On Rocky Linux, install system dependencies
podman run --rm -it -v $(pwd):/workspace:Z rockylinux/rockylinux:9 /bin/bash
# Inside the container, navigate to the project
cd /workspace
# Install epel-release for mandoc
dnf install -y epel-release
# Install system dependencies
dnf install -y python3 python3-pip python3-dnf mandoc rpm-build dnf-plugins-core dnf install -y python3 python3-pip python3-dnf mandoc rpm-build dnf-plugins-core
# Install Python dependencies # Install Python dependencies
pip3 install -e . pip3 install -e .
# Run the tool
python3 -m rocky_man.main --versions 9.6 --output-dir ./html/
``` ```
#### Option 2: On a Native Rocky Linux System #### Usage
```bash ```bash
# Install epel-release for mandoc # Generate man pages for Rocky 9.6
dnf install -y epel-release python -m rocky_man.main --versions 9.6
# Install system dependencies # Generate for multiple versions (default)
dnf install -y python3 python3-pip python3-dnf mandoc rpm-build dnf-plugins-core python -m rocky_man.main --versions 8.10 9.6 10.0
# Install Python dependencies # Custom output directory
pip3 install -e . python -m rocky_man.main --output-dir /var/www/html/man --versions 9.6
# Run the tool # Keep downloaded RPMs for debugging
python3 -m rocky_man.main --versions 9.6 --output-dir ./html/ python -m rocky_man.main --keep-rpms --verbose
# Adjust parallelism for faster processing
python -m rocky_man.main --parallel-downloads 10 --parallel-conversions 20
# Use a different mirror
python -m rocky_man.main --mirror https://mirrors.example.com/
# Only BaseOS (faster)
python -m rocky_man.main --repo-types BaseOS --versions 9.6
``` ```
## Architecture ## Architecture
Rocky Man is organized into components: Rocky Man is organized into clean, modular components:
```text ```
rocky-man/ rocky-man/
├── src/rocky_man/ ├── src/rocky_man/
│ ├── models/ # Data models (Package, ManFile) │ ├── models/ # Data models (Package, ManFile)
@@ -118,28 +143,22 @@ rocky-man/
### How It Works ### How It Works
1. **Package Discovery** - Parses repository metadata (`repodata/repomd.xml` and `filelists.xml.gz`) to identify packages containing files in `/usr/share/man/` directories 1. **Package Discovery** - Parse repository `filelists.xml` to identify packages with man pages
2. **Package Download** - Downloads identified RPM packages using DNF, with configurable parallel downloads (default: 5) 2. **Smart Download** - Download only packages containing man pages with parallel downloads
3. **Man Page Extraction** - Extracts man page files from RPMs using `rpm2cpio`, filtering by section and language based on configuration 3. **Extraction** - Extract man page files from RPM packages
4. **HTML Conversion** - Converts troff-formatted man pages to HTML using mandoc, with parallel processing (default: 10 workers) 4. **Conversion** - Convert troff format to HTML using mandoc
5. **Cross-Reference Linking** - Parses converted HTML to add hyperlinks between man page references (e.g., `bash(1)` becomes clickable) 5. **Web Generation** - Wrap HTML in templates and generate search index
6. **Index Generation** - Creates search indexes (JSON/gzipped) and navigation pages using Jinja2 templates 6. **Cleanup** - Automatically remove temporary files (configurable)
7. **Cleanup** - Removes temporary files (RPMs and extracted content) unless `--keep-rpms` or `--keep-extracts` is specified
## Command Line Options ## Command Line Options
```bash ```
usage: main.py [-h] [--versions VERSIONS [VERSIONS ...]] usage: rocky-man [-h] [--versions VERSIONS [VERSIONS ...]]
[--repo-types REPO_TYPES [REPO_TYPES ...]] [--repo-types REPO_TYPES [REPO_TYPES ...]]
[--output-dir OUTPUT_DIR] [--download-dir DOWNLOAD_DIR] [--output-dir OUTPUT_DIR] [--download-dir DOWNLOAD_DIR]
[--extract-dir EXTRACT_DIR] [--keep-rpms] [--keep-extracts] [--extract-dir EXTRACT_DIR] [--keep-rpms] [--keep-extracts]
[--parallel-downloads PARALLEL_DOWNLOADS] [--parallel-downloads N] [--parallel-conversions N]
[--parallel-conversions PARALLEL_CONVERSIONS] [--mirror MIRROR] [--mirror URL] [--template-dir DIR] [-v]
[--vault] [--existing-versions [VERSION ...]]
[--template-dir TEMPLATE_DIR] [-v]
[--skip-sections [SKIP_SECTIONS ...]]
[--skip-packages [SKIP_PACKAGES ...]] [--skip-languages]
[--keep-languages] [--allow-all-sections]
Generate HTML documentation for Rocky Linux man pages Generate HTML documentation for Rocky Linux man pages
@@ -150,11 +169,11 @@ optional arguments:
--repo-types REPO_TYPES [REPO_TYPES ...] --repo-types REPO_TYPES [REPO_TYPES ...]
Repository types to process (default: BaseOS AppStream) Repository types to process (default: BaseOS AppStream)
--output-dir OUTPUT_DIR --output-dir OUTPUT_DIR
Output directory for HTML files (default: /data/html) Output directory for HTML files (default: ./html)
--download-dir DOWNLOAD_DIR --download-dir DOWNLOAD_DIR
Directory for downloading packages (default: /data/tmp/downloads) Directory for downloading packages (default: ./tmp/downloads)
--extract-dir EXTRACT_DIR --extract-dir EXTRACT_DIR
Directory for extracting man pages (default: /data/tmp/extracts) Directory for extracting man pages (default: ./tmp/extracts)
--keep-rpms Keep downloaded RPM files after processing --keep-rpms Keep downloaded RPM files after processing
--keep-extracts Keep extracted man files after processing --keep-extracts Keep extracted man files after processing
--parallel-downloads PARALLEL_DOWNLOADS --parallel-downloads PARALLEL_DOWNLOADS
@@ -177,11 +196,80 @@ optional arguments:
--allow-all-sections Include all man sections (overrides --skip-sections) --allow-all-sections Include all man sections (overrides --skip-sections)
``` ```
## Attribution ## Troubleshooting
The man pages displayed in this documentation are sourced from Rocky Linux distribution packages. All man page content is copyrighted by their respective authors and distributed under the licenses specified within each man page. ### DNF Errors
This tool generates HTML documentation from man pages contained in Rocky Linux packages but does not modify the content of the man pages themselves. **Problem**: `dnf` module not found or repository errors
**Solution**: Ensure you're running on Rocky Linux or in a Rocky Linux container:
```bash
# Run in Rocky Linux container
podman run --rm -it -v $(pwd):/app rockylinux:9 /bin/bash
cd /app
# Install dependencies
dnf install -y python3 python3-dnf mandoc rpm-build dnf-plugins-core
# Run the script
python3 -m rocky_man.main --versions 9.6
```
### Mandoc Not Found
**Problem**: `mandoc: command not found`
**Solution**: Install mandoc:
```bash
dnf install -y mandoc
```
### Permission Errors in Container
**Problem**: Cannot write to mounted volume
**Solution**: Use the `:Z` flag with podman for SELinux contexts:
```bash
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man
```
For Docker, ensure the volume path is absolute:
```bash
docker run --rm -v "$(pwd)/html":/data/html rocky-man
```
### Out of Memory
**Problem**: Process killed due to memory
**Solution**: Reduce parallelism:
```bash
python -m rocky_man.main --parallel-downloads 2 --parallel-conversions 5
```
### Slow Downloads
**Problem**: Downloads are very slow
**Solution**: Use a closer mirror:
```bash
# Find mirrors at: https://mirrors.rockylinux.org/mirrormanager/mirrors
python -m rocky_man.main --mirror https://mirror.example.com/rocky/
```
## Performance Tips
1. **Use closer mirrors** - Significant speed improvement for downloads
2. **Increase parallelism** - If you have bandwidth: `--parallel-downloads 15`
3. **Process one repo at a time** - Use `--repo-types BaseOS` first, then `--repo-types AppStream`
4. **Keep RPMs for re-runs** - Use `--keep-rpms` if testing
5. **Run in container** - More consistent performance
## License ## License
@@ -189,16 +277,20 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
### Third-Party Software ### Third-Party Software
This project uses several open source components. This project uses several open source components. See [THIRD-PARTY-LICENSES.md](THIRD-PARTY-LICENSES.md) for complete license information and attributions.
Key dependencies include:
- **mandoc** - Man page converter (ISC License)
- **python3-dnf** - DNF package manager Python bindings (GPL-2.0-or-later)
- **Fuse.js** - Client-side search (Apache 2.0)
- **Python packages**: requests, rpmfile, Jinja2, lxml, zstandard
- **Fonts**: Red Hat Display, Red Hat Text, JetBrains Mono (SIL OFL)
### Trademark Notice ### Trademark Notice
Rocky Linux is a trademark of the Rocky Enterprise Software Foundation (RESF). This project is not officially affiliated with or endorsed by RESF. All trademarks are the property of their respective owners. This project complies with RESF's trademark usage guidelines. Rocky Linux is a trademark of the Rocky Enterprise Software Foundation (RESF). This project is not officially affiliated with or endorsed by RESF. All trademarks are the property of their respective owners. This project complies with RESF's trademark usage guidelines.
## Contributing
Contributions welcome! Please:
1. Fork the repository
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
3. Make your changes with proper documentation
4. Test thoroughly
5. Commit with clear messages (`git commit -m 'feat: add amazing feature'`)
6. Push to your branch (`git push origin feature/amazing-feature`)
7. Open a Pull Request

View File

@@ -9,11 +9,11 @@ authors = [
] ]
requires-python = ">=3.9" requires-python = ">=3.9"
dependencies = [ dependencies = [
"requests>=2.32.0", "requests>=2.31.0",
"rpmfile>=2.1.0", "rpmfile>=2.0.0",
"jinja2>=3.1.0", "jinja2>=3.1.0",
"lxml>=6.0.0", "lxml>=5.0.0",
"zstandard>=0.25.0", "zstandard>=0.18.0",
] ]
[project.scripts] [project.scripts]

View File

@@ -43,13 +43,18 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
all_man_files = [] all_man_files = []
# Process each repository type
for repo_type in config.repo_types: for repo_type in config.repo_types:
logger.info(f"Processing {repo_type} repository") logger.info(f"Processing {repo_type} repository")
# Use first available architecture (man pages are arch-independent)
arch = config.architectures[0] arch = config.architectures[0]
# Create cache dir for this repo
cache_dir = config.download_dir / f".cache/{version}/{repo_type}" cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
try: try:
# Initialize repository manager
repo_manager = RepoManager( repo_manager = RepoManager(
config=config, config=config,
version=version, version=version,
@@ -59,6 +64,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
download_dir=version_download_dir, download_dir=version_download_dir,
) )
# List packages (with man pages only)
packages = repo_manager.list_packages(with_manpages_only=True) packages = repo_manager.list_packages(with_manpages_only=True)
if not packages: if not packages:
@@ -67,6 +73,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}") logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
# Filter out packages that should be skipped
if config.skip_packages: if config.skip_packages:
original_count = len(packages) original_count = len(packages)
packages = [ packages = [
@@ -79,11 +86,13 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
) )
logger.info(f"Processing {len(packages)} packages") logger.info(f"Processing {len(packages)} packages")
# Download packages
logger.info("Downloading packages...") logger.info("Downloading packages...")
downloaded = repo_manager.download_packages( downloaded = repo_manager.download_packages(
packages, max_workers=config.parallel_downloads packages, max_workers=config.parallel_downloads
) )
# Extract man pages
logger.info("Extracting man pages...") logger.info("Extracting man pages...")
extractor = ManPageExtractor( extractor = ManPageExtractor(
version_extract_dir, version_extract_dir,
@@ -96,6 +105,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
logger.info(f"Extracted {len(man_files)} man pages") logger.info(f"Extracted {len(man_files)} man pages")
# Read content for each man file
logger.info("Reading man page content...") logger.info("Reading man page content...")
man_files_with_content = [] man_files_with_content = []
for man_file in man_files: for man_file in man_files:
@@ -103,6 +113,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
if content: if content:
man_files_with_content.append((man_file, content)) man_files_with_content.append((man_file, content))
# Convert to HTML
logger.info("Converting man pages to HTML...") logger.info("Converting man pages to HTML...")
converter = ManPageConverter(version_output_dir) converter = ManPageConverter(version_output_dir)
converted = converter.convert_many( converted = converter.convert_many(
@@ -111,6 +122,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
all_man_files.extend(converted) all_man_files.extend(converted)
# Cleanup if requested
if not config.keep_rpms: if not config.keep_rpms:
logger.info("Cleaning up downloaded packages...") logger.info("Cleaning up downloaded packages...")
for package in downloaded: for package in downloaded:
@@ -129,21 +141,30 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
logger.error(f"No man pages were successfully processed for version {version}") logger.error(f"No man pages were successfully processed for version {version}")
return False return False
# Generate web pages
logger.info("Generating web pages...") logger.info("Generating web pages...")
web_gen = WebGenerator(template_dir, config.output_dir) web_gen = WebGenerator(template_dir, config.output_dir)
# Generate search index
search_index = web_gen.generate_search_index(all_man_files, version) search_index = web_gen.generate_search_index(all_man_files, version)
web_gen.save_search_index(search_index, version) web_gen.save_search_index(search_index, version)
# Generate index page
web_gen.generate_index(version, search_index) web_gen.generate_index(version, search_index)
# Generate packages index page
web_gen.generate_packages_index(version, search_index) web_gen.generate_packages_index(version, search_index)
# Set HTML paths for all man files
for man_file in all_man_files: for man_file in all_man_files:
if not man_file.html_path: if not man_file.html_path:
man_file.html_path = web_gen._get_manpage_path(man_file, version) man_file.html_path = web_gen._get_manpage_path(man_file, version)
# Link cross-references between man pages
logger.info("Linking cross-references...") logger.info("Linking cross-references...")
converter.link_cross_references(all_man_files, version) converter.link_cross_references(all_man_files, version)
# Wrap man pages in templates
logger.info("Generating man page HTML...") logger.info("Generating man page HTML...")
for man_file in all_man_files: for man_file in all_man_files:
web_gen.generate_manpage_html(man_file, version) web_gen.generate_manpage_html(man_file, version)
@@ -177,22 +198,22 @@ def main():
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
type=Path, type=Path,
default=Path("/data/html"), default=Path("./html"),
help="Output directory for HTML files (default: /data/html)", help="Output directory for HTML files (default: ./html)",
) )
parser.add_argument( parser.add_argument(
"--download-dir", "--download-dir",
type=Path, type=Path,
default=Path("/data/tmp/downloads"), default=Path("./tmp/downloads"),
help="Directory for downloading packages (default: /data/tmp/downloads)", help="Directory for downloading packages (default: ./tmp/downloads)",
) )
parser.add_argument( parser.add_argument(
"--extract-dir", "--extract-dir",
type=Path, type=Path,
default=Path("/data/tmp/extracts"), default=Path("./tmp/extracts"),
help="Directory for extracting man pages (default: /data/tmp/extracts)", help="Directory for extracting man pages (default: ./tmp/extracts)",
) )
parser.add_argument( parser.add_argument(
@@ -286,17 +307,21 @@ def main():
args = parser.parse_args() args = parser.parse_args()
# Setup logging
setup_logging(args.verbose) setup_logging(args.verbose)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
skip_languages = True # Handle filtering options
skip_languages = True # default
if args.keep_languages: if args.keep_languages:
skip_languages = False skip_languages = False
elif args.skip_languages is not None: elif args.skip_languages is not None:
skip_languages = args.skip_languages skip_languages = args.skip_languages
# Determine content directory
content_dir = "vault/rocky" if args.vault else "pub/rocky" content_dir = "vault/rocky" if args.vault else "pub/rocky"
# Create configuration
config = Config( config = Config(
base_url=args.mirror, base_url=args.mirror,
content_dir=content_dir, content_dir=content_dir,
@@ -315,6 +340,7 @@ def main():
allow_all_sections=args.allow_all_sections, allow_all_sections=args.allow_all_sections,
) )
# Get existing versions from scan and argument
scanned_versions = [ scanned_versions = [
d.name d.name
for d in config.output_dir.iterdir() for d in config.output_dir.iterdir()
@@ -322,6 +348,7 @@ def main():
] ]
arg_versions = args.existing_versions or [] arg_versions = args.existing_versions or []
# Sort versions numerically by (major, minor)
def version_key(v): def version_key(v):
try: try:
major, minor = v.split(".") major, minor = v.split(".")
@@ -338,6 +365,7 @@ def main():
logger.info(f"Repositories: {', '.join(config.repo_types)}") logger.info(f"Repositories: {', '.join(config.repo_types)}")
logger.info(f"Output directory: {config.output_dir}") logger.info(f"Output directory: {config.output_dir}")
# Log filtering configuration
if config.skip_sections: if config.skip_sections:
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}") logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
else: else:
@@ -351,6 +379,7 @@ def main():
else: else:
logger.info("Including all languages") logger.info("Including all languages")
# Process each version
processed_versions = [] processed_versions = []
for version in config.versions: for version in config.versions:
try: try:
@@ -363,13 +392,11 @@ def main():
logger.error("No versions were successfully processed") logger.error("No versions were successfully processed")
return 1 return 1
# Generate root index
logger.info("Generating root index page...") logger.info("Generating root index page...")
web_gen = WebGenerator(args.template_dir, config.output_dir) web_gen = WebGenerator(args.template_dir, config.output_dir)
web_gen.generate_root_index(all_versions) web_gen.generate_root_index(all_versions)
logger.info("Generating 404 page...")
web_gen.generate_404_page()
logger.info("=" * 60) logger.info("=" * 60)
logger.info("Processing complete!") logger.info("Processing complete!")
logger.info(f"Generated documentation for: {', '.join(processed_versions)}") logger.info(f"Generated documentation for: {', '.join(processed_versions)}")

View File

@@ -35,22 +35,35 @@ class ManFile:
self._parse_path() self._parse_path()
def _parse_path(self): def _parse_path(self):
"""Extract section, name, and language from the file path.""" """Extract section, name, and language from the file path.
Example paths:
/usr/share/man/man1/bash.1.gz
/usr/share/man/es/man1/bash.1.gz
/usr/share/man/man3/printf.3.gz
"""
parts = self.file_path.parts parts = self.file_path.parts
filename = self.file_path.name filename = self.file_path.name
# Remove .gz extension if present
if filename.endswith('.gz'): if filename.endswith('.gz'):
filename = filename[:-3] filename = filename[:-3]
# Extract section from parent directory (e.g., 'man1', 'man3p', 'man3pm')
for part in reversed(parts): for part in reversed(parts):
if part.startswith('man') and len(part) > 3: if part.startswith('man') and len(part) > 3:
# Check if it starts with 'man' followed by a digit
if part[3].isdigit(): if part[3].isdigit():
self.section = part[3:] self.section = part[3:]
break break
# Extract section from filename if not found yet (e.g., 'foo.3pm' -> section '3pm')
# and extract name
name_parts = filename.split('.') name_parts = filename.split('.')
if len(name_parts) >= 2: if len(name_parts) >= 2:
# Try to identify section from last part
potential_section = name_parts[-1] potential_section = name_parts[-1]
# Section is typically digit optionally followed by letters (1, 3p, 3pm, etc.)
if potential_section and potential_section[0].isdigit(): if potential_section and potential_section[0].isdigit():
if not self.section: if not self.section:
self.section = potential_section self.section = potential_section
@@ -60,10 +73,14 @@ class ManFile:
else: else:
self.name = name_parts[0] self.name = name_parts[0]
# Check for language subdirectory
# Pattern: /usr/share/man/<lang>/man<section>/
for i, part in enumerate(parts): for i, part in enumerate(parts):
if part == 'man' and i + 1 < len(parts): if part == 'man' and i + 1 < len(parts):
next_part = parts[i + 1] next_part = parts[i + 1]
# If next part is not 'man<digit>', it's a language code
if not (next_part.startswith('man') and next_part[3:].isdigit()): if not (next_part.startswith('man') and next_part[3:].isdigit()):
# Common language codes are 2-5 chars (en, es, pt_BR, etc.)
if len(next_part) <= 5: if len(next_part) <= 5:
self.language = next_part self.language = next_part
break break
@@ -76,12 +93,14 @@ class ManFile:
@property @property
def html_filename(self) -> str: def html_filename(self) -> str:
"""Get the HTML filename for this man page.""" """Get the HTML filename for this man page."""
# Clean name for filesystem safety
safe_name = self._clean_filename(self.name) safe_name = self._clean_filename(self.name)
suffix = f".{self.language}" if self.language else "" suffix = f".{self.language}" if self.language else ""
return f"{safe_name}.{self.section}{suffix}.html" return f"{safe_name}.{self.section}{suffix}.html"
def _clean_filename(self, name: str) -> str: def _clean_filename(self, name: str) -> str:
"""Clean filename for filesystem safety.""" """Clean filename for filesystem safety."""
# Replace problematic characters
name = name.replace('/', '_') name = name.replace('/', '_')
name = name.replace(':', '_') name = name.replace(':', '_')
name = re.sub(r'\.\.', '__', name) name = re.sub(r'\.\.', '__', name)
@@ -89,13 +108,19 @@ class ManFile:
@property @property
def uri_path(self) -> str: def uri_path(self) -> str:
"""Get the URI path for this man page (relative to version root).""" """Get the URI path for this man page (relative to version root).
Returns path like: 'bash/man1/bash.1.html'
"""
if not self.html_path: if not self.html_path:
return "" return ""
# Get path relative to the version directory
# Assuming structure: html/<version>/<package>/<section>/<file>.html
parts = self.html_path.parts parts = self.html_path.parts
try: try:
# Find the version part (e.g., '9.5') and return everything after it
for i, part in enumerate(parts): for i, part in enumerate(parts):
if re.match(r'\d+\.\d+', part): if re.match(r'\d+\.\d+', part): # Version pattern
return '/'.join(parts[i+1:]) return '/'.join(parts[i+1:])
except (ValueError, IndexError): except (ValueError, IndexError):
pass pass

View File

@@ -38,11 +38,15 @@ class ManPageConverter:
def _check_mandoc() -> bool: def _check_mandoc() -> bool:
"""Check if mandoc is available.""" """Check if mandoc is available."""
try: try:
# Run mandoc with no arguments - it will show usage and exit
# We just want to verify the command exists, not that it succeeds
subprocess.run(["mandoc"], capture_output=True, timeout=5) subprocess.run(["mandoc"], capture_output=True, timeout=5)
return True return True
except FileNotFoundError: except FileNotFoundError:
# mandoc command not found
return False return False
except Exception: except Exception:
# Other errors (timeout, etc) - but mandoc exists
return True return True
def convert(self, man_file: ManFile, content: str) -> bool: def convert(self, man_file: ManFile, content: str) -> bool:
@@ -56,20 +60,26 @@ class ManPageConverter:
True if conversion successful True if conversion successful
""" """
try: try:
# Run mandoc to convert to HTML
html = self._run_mandoc(content) html = self._run_mandoc(content)
if not html: if not html:
logger.warning(f"mandoc produced no output for {man_file.display_name}") logger.warning(f"mandoc produced no output for {man_file.display_name}")
return False return False
# Clean up HTML
html = self._clean_html(html) html = self._clean_html(html)
# Check if output indicates this is a symlink/redirect # Check if mandoc output indicates this is a symlink/redirect
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
# or: <div class="manual-text">See the file man1/builtin.1.</div>
symlink_match = re.search( symlink_match = re.search(
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>', r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
html, html,
re.DOTALL, re.DOTALL,
) )
if not symlink_match: if not symlink_match:
# Try simpler pattern without "See the file" or period
symlink_match = re.search( symlink_match = re.search(
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>', r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
html, html,
@@ -84,9 +94,14 @@ class ManPageConverter:
) )
html = self._generate_redirect_html({"name": name, "section": section}) html = self._generate_redirect_html({"name": name, "section": section})
# Store in ManFile object
man_file.html_content = html man_file.html_content = html
# Determine output path
output_path = self._get_output_path(man_file) output_path = self._get_output_path(man_file)
man_file.html_path = output_path man_file.html_path = output_path
# Save HTML file
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f: with open(output_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
@@ -113,11 +128,13 @@ class ManPageConverter:
converted = [] converted = []
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all conversion tasks
future_to_manfile = { future_to_manfile = {
executor.submit(self.convert, man_file, content): man_file executor.submit(self.convert, man_file, content): man_file
for man_file, content in man_files for man_file, content in man_files
} }
# Collect results
for future in as_completed(future_to_manfile): for future in as_completed(future_to_manfile):
man_file = future_to_manfile[future] man_file = future_to_manfile[future]
try: try:
@@ -149,6 +166,7 @@ class ManPageConverter:
if result.returncode != 0: if result.returncode != 0:
stderr = result.stderr.decode("utf-8", errors="replace") stderr = result.stderr.decode("utf-8", errors="replace")
logger.warning(f"mandoc returned error: {stderr}") logger.warning(f"mandoc returned error: {stderr}")
# Sometimes mandoc returns non-zero but still produces output
if result.stdout: if result.stdout:
return result.stdout.decode("utf-8", errors="replace") return result.stdout.decode("utf-8", errors="replace")
return None return None
@@ -171,27 +189,15 @@ class ManPageConverter:
Returns: Returns:
Cleaned HTML Cleaned HTML
""" """
# Fix empty header cells # Remove empty parentheses in header cells
html = re.sub( html = re.sub(
r'<td class="head-(ltitle|rtitle)">\(\)</td>', r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
r'<td class="head-\1"></td>', )
html, html = re.sub(
r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
) )
# Remove empty <p class="Pp"></p> tags (from .sp directives in troff) # Strip leading/trailing whitespace
html = re.sub(r'<p class="Pp">\s*</p>', '', html)
# Clean up trailing whitespace and br tags in pre blocks
# Match: <pre>...</pre> and clean trailing <br/> followed by whitespace
def clean_pre_block(match):
content = match.group(1)
# Remove trailing <br/> tags and whitespace before closing </pre>
content = re.sub(r'<br\s*/>\s*$', '', content)
content = re.sub(r'\s+$', '', content)
return f'<pre>{content}</pre>'
html = re.sub(r'<pre>(.*?)</pre>', clean_pre_block, html, flags=re.DOTALL)
html = html.strip() html = html.strip()
return html return html
@@ -207,8 +213,12 @@ class ManPageConverter:
""" """
name = target_info["name"] name = target_info["name"]
section = target_info["section"] section = target_info["section"]
# Generate the relative path to the target man page
# Symlinks are in the same package, just different file names
target_filename = f"{name}.{section}.html" target_filename = f"{name}.{section}.html"
# Generate simple redirect HTML with a working hyperlink
html = f'''<div class="symlink-notice" style="padding: 2rem; text-align: center; background-color: var(--bg-tertiary); border-radius: 8px; border: 1px solid var(--border-color);"> html = f'''<div class="symlink-notice" style="padding: 2rem; text-align: center; background-color: var(--bg-tertiary); border-radius: 8px; border: 1px solid var(--border-color);">
<p style="font-size: 1.2rem; margin-bottom: 1.5rem; color: var(--text-primary);"> <p style="font-size: 1.2rem; margin-bottom: 1.5rem; color: var(--text-primary);">
This is an alias for <b>{name}</b>({section}). This is an alias for <b>{name}</b>({section}).
@@ -220,26 +230,35 @@ class ManPageConverter:
return html return html
def link_cross_references(self, man_files: List[ManFile], version: str) -> None: def link_cross_references(self, man_files: List[ManFile], version: str) -> None:
"""Add hyperlinks to cross-references in man pages. """Add hyperlinks to cross-references in SEE ALSO sections.
Goes through all converted HTML files and converts man page references
like pty(4) into working hyperlinks.
Args: Args:
man_files: List of all converted ManFile objects man_files: List of all converted ManFile objects
version: Rocky Linux version
""" """
# Build lookup index: (name, section) -> relative_path
lookup = {} lookup = {}
for mf in man_files: for mf in man_files:
key = (mf.name.lower(), str(mf.section)) key = (mf.name.lower(), str(mf.section))
if key not in lookup: if key not in lookup:
# Store the relative path from the version root
lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}" lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}"
logger.info(f"Linking cross-references across {len(man_files)} man pages...") logger.info(f"Linking cross-references across {len(man_files)} man pages...")
# Process each man page HTML content
for man_file in man_files: for man_file in man_files:
if not man_file.html_content: if not man_file.html_content:
continue continue
try: try:
html = man_file.html_content html = man_file.html_content
# Find and replace man page references
# Mandoc outputs references as: <b>name</b>(section)
# Pattern matches both <b>name</b>(section) and plain name(section)
pattern = ( pattern = (
r"<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)" r"<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)"
) )
@@ -247,25 +266,42 @@ class ManPageConverter:
def replace_reference(match): def replace_reference(match):
full_match = match.group(0) full_match = match.group(0)
# Skip if already inside an <a> tag # Check if this match is already inside an <a> tag
# Look back up to 500 chars for context
before_text = html[max(0, match.start() - 500) : match.start()] before_text = html[max(0, match.start() - 500) : match.start()]
# Find the last <a and last </a> before this match
last_open = before_text.rfind("<a ") last_open = before_text.rfind("<a ")
last_close = before_text.rfind("</a>") last_close = before_text.rfind("</a>")
# If the last <a> is after the last </a>, we're inside a link
if last_open > last_close: if last_open > last_close:
return full_match return full_match
name = (match.group(1) or match.group(3)).lower() if match.group(1): # <b>name</b>(section) format
section = match.group(2) or match.group(4) name = match.group(1).lower()
section = match.group(2)
else: # plain name(section) format
name = match.group(3).lower()
section = match.group(4)
# Look up the referenced man page
key = (name, section) key = (name, section)
if key in lookup: if key in lookup:
# Calculate relative path from current file to target
target_path = lookup[key] target_path = lookup[key]
# File structure: output_dir/version/package_name/manN/file.html
# Need to go up 3 levels to reach output root, then down to version/target
# Current: version/package_name/manN/file.html
# Target: version/other_package/manM/file.html
rel_path = f"../../../{version}/{target_path}" rel_path = f"../../../{version}/{target_path}"
return f'<a href="{rel_path}">{full_match}</a>' return f'<a href="{rel_path}">{full_match}</a>'
return full_match return full_match
updated_html = re.sub(pattern, replace_reference, html) updated_html = re.sub(pattern, replace_reference, html)
# Update the content if something changed
if updated_html != html: if updated_html != html:
man_file.html_content = updated_html man_file.html_content = updated_html
@@ -277,7 +313,23 @@ class ManPageConverter:
logger.info("Cross-reference linking complete") logger.info("Cross-reference linking complete")
def _get_output_path(self, man_file: ManFile) -> Path: def _get_output_path(self, man_file: ManFile) -> Path:
"""Determine output path for HTML file.""" """Determine output path for HTML file.
Structure: output_dir/<package>/<section>/<name>.<section>[.<lang>].html
Args:
man_file: ManFile object
Returns:
Path for HTML output
"""
# Package directory
pkg_dir = self.output_dir / man_file.package_name pkg_dir = self.output_dir / man_file.package_name
# Section directory (man1, man2, etc.)
section_dir = pkg_dir / f"man{man_file.section}" section_dir = pkg_dir / f"man{man_file.section}"
return section_dir / man_file.html_filename
# HTML filename
filename = man_file.html_filename
return section_dir / filename

View File

@@ -48,6 +48,7 @@ class ManPageExtractor:
logger.warning(f"Package file not found: {package.name}") logger.warning(f"Package file not found: {package.name}")
return [] return []
# Create extraction directory for this package
pkg_extract_dir = self.extract_dir / package.name pkg_extract_dir = self.extract_dir / package.name
pkg_extract_dir.mkdir(parents=True, exist_ok=True) pkg_extract_dir.mkdir(parents=True, exist_ok=True)
@@ -58,39 +59,33 @@ class ManPageExtractor:
with rpmfile.open(package.download_path) as rpm: with rpmfile.open(package.download_path) as rpm:
for member in rpm.getmembers(): for member in rpm.getmembers():
# Check if this is a man page file
if not self._is_manpage(member.name): if not self._is_manpage(member.name):
continue continue
# Sanitize path to prevent path traversal attacks # Create ManFile object
safe_name = member.name.lstrip('/') extract_path = pkg_extract_dir / member.name.lstrip('/')
extract_path = pkg_extract_dir / safe_name
# Resolve to absolute path and verify it's within the extraction directory
real_extract_path = extract_path.resolve()
real_pkg_extract_dir = pkg_extract_dir.resolve()
if not real_extract_path.is_relative_to(real_pkg_extract_dir):
logger.warning(f"Skipping file with path traversal attempt: {member.name}")
continue
man_file = ManFile( man_file = ManFile(
file_path=real_extract_path, file_path=extract_path,
package_name=package.name package_name=package.name
) )
# Apply section filtering
if self.skip_sections and man_file.section in self.skip_sections: if self.skip_sections and man_file.section in self.skip_sections:
logger.debug(f"Skipping {man_file.display_name} (section {man_file.section})") logger.debug(f"Skipping {man_file.display_name} (section {man_file.section})")
continue continue
# Apply language filtering
if self.skip_languages and man_file.language and man_file.language != 'en': if self.skip_languages and man_file.language and man_file.language != 'en':
logger.debug(f"Skipping {man_file.display_name} (language {man_file.language})") logger.debug(f"Skipping {man_file.display_name} (language {man_file.language})")
continue continue
real_extract_path.parent.mkdir(parents=True, exist_ok=True) # Extract the file
extract_path.parent.mkdir(parents=True, exist_ok=True)
try: try:
content = rpm.extractfile(member).read() content = rpm.extractfile(member).read()
with open(real_extract_path, 'wb') as f: with open(extract_path, 'wb') as f:
f.write(content) f.write(content)
man_file.content = content man_file.content = content
@@ -123,11 +118,13 @@ class ManPageExtractor:
all_man_files = [] all_man_files = []
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all extraction tasks
future_to_pkg = { future_to_pkg = {
executor.submit(self.extract_from_package, pkg): pkg executor.submit(self.extract_from_package, pkg): pkg
for pkg in packages for pkg in packages
} }
# Collect results
for future in as_completed(future_to_pkg): for future in as_completed(future_to_pkg):
pkg = future_to_pkg[future] pkg = future_to_pkg[future]
try: try:
@@ -153,15 +150,27 @@ class ManPageExtractor:
return "" return ""
try: try:
# Try reading as gzipped file first
if man_file.file_path.suffix == '.gz': if man_file.file_path.suffix == '.gz':
try: with gzip.open(man_file.file_path, 'rb') as f:
with gzip.open(man_file.file_path, 'rb') as f: content = f.read()
return f.read().decode('utf-8', errors='replace') else:
except gzip.BadGzipFile: # Read as plain text
pass with open(man_file.file_path, 'rb') as f:
content = f.read()
with open(man_file.file_path, 'rb') as f: # Decode with error handling
return f.read().decode('utf-8', errors='replace') return content.decode('utf-8', errors='replace')
except gzip.BadGzipFile:
# Not a gzip file, try reading as plain text
try:
with open(man_file.file_path, 'rb') as f:
content = f.read()
return content.decode('utf-8', errors='replace')
except Exception as e:
logger.error(f"Error reading {man_file.file_path}: {e}")
return ""
except Exception as e: except Exception as e:
logger.error(f"Error reading {man_file.file_path}: {e}") logger.error(f"Error reading {man_file.file_path}: {e}")
@@ -169,19 +178,37 @@ class ManPageExtractor:
@staticmethod @staticmethod
def _is_manpage(path: str) -> bool: def _is_manpage(path: str) -> bool:
"""Check if a file path is a man page.""" """Check if a file path is a man page.
Args:
path: File path to check
Returns:
True if this looks like a man page file
"""
# Must contain /man/ in path
if '/man/' not in path: if '/man/' not in path:
return False return False
# Should be in /usr/share/man/ or /usr/man/
if not ('/share/man/' in path or path.startswith('/usr/man/')): if not ('/share/man/' in path or path.startswith('/usr/man/')):
return False return False
# Common man page patterns
# - /usr/share/man/man1/foo.1.gz
# - /usr/share/man/es/man1/foo.1.gz
# - /usr/share/man/man3/printf.3.gz
parts = path.split('/') parts = path.split('/')
return any(
# Check for man<digit> directory
has_man_section = any(
part.startswith('man') and len(part) > 3 and part[3].isdigit() part.startswith('man') and len(part) > 3 and part[3].isdigit()
for part in parts for part in parts
) )
return has_man_section
def cleanup_extracts(self, package: Package): def cleanup_extracts(self, package: Package):
"""Clean up extracted files for a package. """Clean up extracted files for a package.

View File

@@ -4,7 +4,7 @@ import gzip
import logging import logging
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from pathlib import Path from pathlib import Path
from typing import Set from typing import Set, Dict
from urllib.parse import urljoin from urllib.parse import urljoin
import requests import requests
@@ -38,16 +38,19 @@ class ContentsParser:
""" """
logger.info(f"Fetching filelists for {self.repo_url}") logger.info(f"Fetching filelists for {self.repo_url}")
# Download and parse repomd.xml to find filelists location
filelists_path = self._get_filelists_path() filelists_path = self._get_filelists_path()
if not filelists_path: if not filelists_path:
logger.warning("Could not find filelists in repository metadata") logger.warning("Could not find filelists in repository metadata")
return set() return set()
# Download filelists.xml
filelists_file = self._download_filelists(filelists_path) filelists_file = self._download_filelists(filelists_path)
if not filelists_file: if not filelists_file:
logger.warning("Could not download filelists") logger.warning("Could not download filelists")
return set() return set()
# Parse filelists to find packages with man pages
packages = self._parse_filelists(filelists_file) packages = self._parse_filelists(filelists_file)
logger.info(f"Found {len(packages)} packages with man pages") logger.info(f"Found {len(packages)} packages with man pages")
@@ -65,7 +68,11 @@ class ContentsParser:
response = requests.get(repomd_url, timeout=30) response = requests.get(repomd_url, timeout=30)
response.raise_for_status() response.raise_for_status()
# Parse XML
root = ET.fromstring(response.content) root = ET.fromstring(response.content)
# Find filelists entry
# XML structure: <repomd><data type="filelists"><location href="..."/></data></repomd>
ns = {'repo': 'http://linux.duke.edu/metadata/repo'} ns = {'repo': 'http://linux.duke.edu/metadata/repo'}
for data in root.findall('repo:data', ns): for data in root.findall('repo:data', ns):
@@ -74,7 +81,7 @@ class ContentsParser:
if location is not None: if location is not None:
return location.get('href') return location.get('href')
# Fallback without namespace # Fallback: try without namespace
for data in root.findall('data'): for data in root.findall('data'):
if data.get('type') == 'filelists': if data.get('type') == 'filelists':
location = data.find('location') location = data.find('location')
@@ -98,6 +105,7 @@ class ContentsParser:
url = urljoin(self.repo_url, relative_path) url = urljoin(self.repo_url, relative_path)
cache_file = self.cache_dir / relative_path.split('/')[-1] cache_file = self.cache_dir / relative_path.split('/')[-1]
# Return cached file if it exists
if cache_file.exists(): if cache_file.exists():
logger.debug(f"Using cached filelists: {cache_file}") logger.debug(f"Using cached filelists: {cache_file}")
return cache_file return cache_file
@@ -130,26 +138,36 @@ class ContentsParser:
packages = set() packages = set()
try: try:
# Open gzipped XML file
with gzip.open(filelists_path, 'rb') as f: with gzip.open(filelists_path, 'rb') as f:
# Use iterparse for memory efficiency (files can be large)
context = ET.iterparse(f, events=('start', 'end')) context = ET.iterparse(f, events=('start', 'end'))
current_package = None current_package = None
has_manpage = False has_manpage = False
for event, elem in context: for event, elem in context:
if event == 'start' and elem.tag.endswith('package'): if event == 'start':
current_package = elem.get('name') if elem.tag.endswith('package'):
has_manpage = False # Get package name from 'name' attribute
current_package = elem.get('name')
has_manpage = False
elif event == 'end': elif event == 'end':
if elem.tag.endswith('file'): if elem.tag.endswith('file'):
# Check if file path contains /man/
file_path = elem.text file_path = elem.text
if file_path and self._is_manpage_path(file_path): if file_path and '/man/' in file_path:
has_manpage = True # Could be /usr/share/man/ or /usr/man/
if '/share/man/' in file_path or file_path.startswith('/usr/man/'):
has_manpage = True
elif elem.tag.endswith('package'): elif elem.tag.endswith('package'):
# End of package entry
if has_manpage and current_package: if has_manpage and current_package:
packages.add(current_package) packages.add(current_package)
# Clear element to free memory
elem.clear() elem.clear()
current_package = None current_package = None
has_manpage = False has_manpage = False
@@ -159,16 +177,45 @@ class ContentsParser:
return packages return packages
@staticmethod def get_package_man_files(self, filelists_path: Path) -> Dict[str, list]:
def _is_manpage_path(file_path: str) -> bool: """Get detailed list of man files for each package.
"""Check if a file path is a man page location.
Args: Args:
file_path: File path to check filelists_path: Path to filelists.xml.gz file
Returns: Returns:
True if path is in a standard man page directory Dict mapping package name to list of man page paths
""" """
return '/man/' in file_path and ( packages = {}
'/share/man/' in file_path or file_path.startswith('/usr/man/')
) try:
with gzip.open(filelists_path, 'rb') as f:
context = ET.iterparse(f, events=('start', 'end'))
current_package = None
current_files = []
for event, elem in context:
if event == 'start':
if elem.tag.endswith('package'):
current_package = elem.get('name')
current_files = []
elif event == 'end':
if elem.tag.endswith('file'):
file_path = elem.text
if file_path and '/share/man/' in file_path:
current_files.append(file_path)
elif elem.tag.endswith('package'):
if current_files and current_package:
packages[current_package] = current_files
elem.clear()
current_package = None
current_files = []
except Exception as e:
logger.error(f"Error parsing filelists: {e}")
return packages

View File

@@ -52,6 +52,7 @@ class RepoManager:
self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True)
self.download_dir.mkdir(parents=True, exist_ok=True) self.download_dir.mkdir(parents=True, exist_ok=True)
# Initialize DNF
self.base = dnf.Base() self.base = dnf.Base()
self.base.conf.debuglevel = 0 self.base.conf.debuglevel = 0
self.base.conf.errorlevel = 0 self.base.conf.errorlevel = 0
@@ -66,23 +67,28 @@ class RepoManager:
repo = dnf.repo.Repo(repo_id, self.base.conf) repo = dnf.repo.Repo(repo_id, self.base.conf)
repo.baseurl = [self.repo_url] repo.baseurl = [self.repo_url]
repo.enabled = True repo.enabled = True
repo.gpgcheck = False repo.gpgcheck = False # We verify checksums separately
self.base.repos.add(repo) self.base.repos.add(repo)
logger.info(f"Configured repository: {repo_id} at {self.repo_url}") logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
# Fill the sack (package database)
self.base.fill_sack(load_system_repo=False, load_available_repos=True) self.base.fill_sack(load_system_repo=False, load_available_repos=True)
logger.info("Repository metadata loaded") logger.info("Repository metadata loaded")
def discover_packages_with_manpages(self) -> Set[str]: def discover_packages_with_manpages(self) -> Set[str]:
"""Discover which packages contain man pages using filelists. """Discover which packages contain man pages using filelists.
This is the key optimization - we parse repository metadata
to identify packages with man pages before downloading anything.
Returns: Returns:
Set of package names that contain man pages Set of package names that contain man pages
""" """
if self.packages_with_manpages is not None: if self.packages_with_manpages is not None:
return self.packages_with_manpages return self.packages_with_manpages
# Try pub first, then vault if it fails
content_dirs = ["pub/rocky", "vault/rocky"] content_dirs = ["pub/rocky", "vault/rocky"]
for content_dir in content_dirs: for content_dir in content_dirs:
original_content_dir = self.config.content_dir original_content_dir = self.config.content_dir
@@ -93,9 +99,9 @@ class RepoManager:
) )
parser = ContentsParser(repo_url, self.cache_dir) parser = ContentsParser(repo_url, self.cache_dir)
packages = parser.get_packages_with_manpages() packages = parser.get_packages_with_manpages()
if packages: if packages: # Only use if it has man pages
self.packages_with_manpages = packages self.packages_with_manpages = packages
self.repo_url = repo_url self.repo_url = repo_url # Set for later use
logger.info(f"Using repository: {repo_url}") logger.info(f"Using repository: {repo_url}")
break break
else: else:
@@ -124,29 +130,39 @@ class RepoManager:
f"Querying packages from {self.repo_type} ({self.version}/{self.arch})" f"Querying packages from {self.repo_type} ({self.version}/{self.arch})"
) )
# Get packages with man pages if filtering
manpage_packages = None manpage_packages = None
if with_manpages_only: if with_manpages_only:
manpage_packages = self.discover_packages_with_manpages() manpage_packages = self.discover_packages_with_manpages()
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages") logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
# Configure DNF repo now that we have the correct repo_url
self._configure_repo() self._configure_repo()
packages = [] packages = []
# Query all available packages
query = self.base.sack.query().available() query = self.base.sack.query().available()
# For each package name, get only one arch (prefer noarch, then our target arch)
seen_names = set() seen_names = set()
for pkg in query: for pkg in query:
pkg_name = pkg.name pkg_name = pkg.name
# Skip if we've already added this package
if pkg_name in seen_names: if pkg_name in seen_names:
continue continue
# Skip if filtering and package doesn't have man pages
if manpage_packages and pkg_name not in manpage_packages: if manpage_packages and pkg_name not in manpage_packages:
continue continue
# Get repo information
repo = pkg.repo repo = pkg.repo
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
chksum_type, chksum_value = pkg.chksum if pkg.chksum else ("sha256", "")
# Create Package object
package = Package( package = Package(
name=pkg_name, name=pkg_name,
version=pkg.version, version=pkg.version,
@@ -155,16 +171,16 @@ class RepoManager:
repo_type=self.repo_type, repo_type=self.repo_type,
location=pkg.location, location=pkg.location,
baseurl=baseurl, baseurl=baseurl,
checksum=chksum_value, checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
checksum_type=chksum_type, checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
has_manpages=bool(manpage_packages), has_manpages=True if manpage_packages else False,
) )
packages.append(package) packages.append(package)
seen_names.add(pkg_name) seen_names.add(pkg_name)
logger.info(f"Found {len(packages)} packages to process") logger.info(f"Found {len(packages)} packages to process")
return sorted(packages) return sorted(packages) # Sort by name for consistent ordering
def download_package(self, package: Package) -> bool: def download_package(self, package: Package) -> bool:
"""Download a single package. """Download a single package.
@@ -178,6 +194,7 @@ class RepoManager:
download_path = self.download_dir / package.filename download_path = self.download_dir / package.filename
package.download_path = download_path package.download_path = download_path
# Skip if already downloaded
if download_path.exists(): if download_path.exists():
logger.debug(f"Package already downloaded: {package.filename}") logger.debug(f"Package already downloaded: {package.filename}")
return True return True
@@ -187,6 +204,7 @@ class RepoManager:
response = requests.get(package.download_url, timeout=300, stream=True) response = requests.get(package.download_url, timeout=300, stream=True)
response.raise_for_status() response.raise_for_status()
# Download with progress (optional: could add progress bar here)
with open(download_path, "wb") as f: with open(download_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk: if chunk:
@@ -197,6 +215,7 @@ class RepoManager:
except Exception as e: except Exception as e:
logger.error(f"Error downloading {package.filename}: {e}") logger.error(f"Error downloading {package.filename}: {e}")
# Clean up partial download
if download_path.exists(): if download_path.exists():
download_path.unlink() download_path.unlink()
return False return False
@@ -216,10 +235,12 @@ class RepoManager:
downloaded = [] downloaded = []
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all download tasks
future_to_pkg = { future_to_pkg = {
executor.submit(self.download_package, pkg): pkg for pkg in packages executor.submit(self.download_package, pkg): pkg for pkg in packages
} }
# Process completed downloads
for future in as_completed(future_to_pkg): for future in as_completed(future_to_pkg):
pkg = future_to_pkg[future] pkg = future_to_pkg[future]
try: try:

View File

@@ -24,26 +24,31 @@ class Config:
parallel_conversions: Number of parallel HTML conversions parallel_conversions: Number of parallel HTML conversions
""" """
# Repository configuration
base_url: str = "http://dl.rockylinux.org/" base_url: str = "http://dl.rockylinux.org/"
content_dir: str = "pub/rocky" content_dir: str = "pub/rocky"
versions: List[str] = None versions: List[str] = None
architectures: List[str] = None architectures: List[str] = None
repo_types: List[str] = None repo_types: List[str] = None
# Directory configuration
download_dir: Path = Path("/data/tmp/downloads") download_dir: Path = Path("/data/tmp/downloads")
extract_dir: Path = Path("/data/tmp/extracts") extract_dir: Path = Path("/data/tmp/extracts")
output_dir: Path = Path("/data/html") output_dir: Path = Path("/data/html")
# Cleanup options
keep_rpms: bool = False keep_rpms: bool = False
keep_extracts: bool = False keep_extracts: bool = False
# Performance options
parallel_downloads: int = 5 parallel_downloads: int = 5
parallel_conversions: int = 10 parallel_conversions: int = 10
# Filtering options
skip_sections: List[str] = None skip_sections: List[str] = None
skip_packages: List[str] = None skip_packages: List[str] = None
skip_languages: bool = True skip_languages: bool = True # Skip non-English languages by default
allow_all_sections: bool = False allow_all_sections: bool = False # Override skip_sections if True
def __post_init__(self): def __post_init__(self):
"""Set defaults and ensure directories exist.""" """Set defaults and ensure directories exist."""
@@ -51,16 +56,20 @@ class Config:
self.versions = ["8.10", "9.6", "10.0"] self.versions = ["8.10", "9.6", "10.0"]
if self.architectures is None: if self.architectures is None:
# Man pages are arch-independent, so we just need one
# We prefer x86_64 as it's most common, fallback to others
self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"] self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"]
if self.repo_types is None: if self.repo_types is None:
self.repo_types = ["BaseOS", "AppStream"] self.repo_types = ["BaseOS", "AppStream"]
# Set default skip sections (man3 library APIs)
if self.skip_sections is None and not self.allow_all_sections: if self.skip_sections is None and not self.allow_all_sections:
self.skip_sections = ["3", "3p", "3pm"] self.skip_sections = ["3", "3p", "3pm"]
elif self.allow_all_sections: elif self.allow_all_sections:
self.skip_sections = [] self.skip_sections = []
# Set default skip packages (high-volume API docs)
if self.skip_packages is None: if self.skip_packages is None:
self.skip_packages = [ self.skip_packages = [
"lapack", "lapack",
@@ -68,6 +77,7 @@ class Config:
"gl-manpages", "gl-manpages",
] ]
# Ensure all paths are Path objects
self.download_dir = Path(self.download_dir) self.download_dir = Path(self.download_dir)
self.extract_dir = Path(self.extract_dir) self.extract_dir = Path(self.extract_dir)
self.output_dir = Path(self.output_dir) self.output_dir = Path(self.output_dir)

View File

@@ -3,7 +3,6 @@
import gzip import gzip
import json import json
import logging import logging
from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import List, Dict, Any from typing import List, Dict, Any
@@ -34,6 +33,7 @@ class WebGenerator:
self.output_dir = Path(output_dir) self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True) self.output_dir.mkdir(parents=True, exist_ok=True)
# Setup Jinja2 environment
self.env = Environment( self.env = Environment(
loader=FileSystemLoader(str(self.template_dir)), loader=FileSystemLoader(str(self.template_dir)),
autoescape=select_autoescape(["html", "xml"]), autoescape=select_autoescape(["html", "xml"]),
@@ -66,6 +66,7 @@ class WebGenerator:
content=man_file.html_content, content=man_file.html_content,
) )
# Ensure output path is set
if not man_file.html_path: if not man_file.html_path:
man_file.html_path = self._get_manpage_path(man_file, version) man_file.html_path = self._get_manpage_path(man_file, version)
@@ -126,18 +127,24 @@ class WebGenerator:
True if successful True if successful
""" """
try: try:
packages_by_letter = defaultdict(list) # Group packages by first letter
packages_by_letter = {}
for pkg_name, pages in search_data.items(): for pkg_name, pages in search_data.items():
first_char = pkg_name[0].upper() first_char = pkg_name[0].upper()
if not first_char.isalpha(): if not first_char.isalpha():
first_char = "other" first_char = "other"
if first_char not in packages_by_letter:
packages_by_letter[first_char] = []
packages_by_letter[first_char].append( packages_by_letter[first_char].append(
{"name": pkg_name, "count": len(pages)} {"name": pkg_name, "count": len(pages)}
) )
for packages in packages_by_letter.values(): # Sort packages within each letter
packages.sort(key=lambda x: x["name"]) for letter in packages_by_letter:
packages_by_letter[letter].sort(key=lambda x: x["name"])
template = self.env.get_template("packages.html") template = self.env.get_template("packages.html")
@@ -181,6 +188,7 @@ class WebGenerator:
if pkg_name not in index: if pkg_name not in index:
index[pkg_name] = {} index[pkg_name] = {}
# Create entry for this man page
entry = { entry = {
"name": man_file.name, "name": man_file.name,
"section": man_file.section, "section": man_file.section,
@@ -190,6 +198,7 @@ class WebGenerator:
"full_name": f"{man_file.package_name} - {man_file.display_name}", "full_name": f"{man_file.package_name} - {man_file.display_name}",
} }
# Use display name as key (handles duplicates with different sections)
key = man_file.display_name key = man_file.display_name
if man_file.language: if man_file.language:
key = f"{key}.{man_file.language}" key = f"{key}.{man_file.language}"
@@ -214,11 +223,15 @@ class WebGenerator:
json_path = version_dir / "search.json" json_path = version_dir / "search.json"
gz_path = version_dir / "search.json.gz" gz_path = version_dir / "search.json.gz"
# Sort for consistency
sorted_index = {k: index[k] for k in sorted(index)} sorted_index = {k: index[k] for k in sorted(index)}
# Save plain JSON
with open(json_path, "w", encoding="utf-8") as f: with open(json_path, "w", encoding="utf-8") as f:
json.dump(sorted_index, f, indent=2) json.dump(sorted_index, f, indent=2)
# Save gzipped JSON
with gzip.open(gz_path, "wt", encoding="utf-8") as f: with gzip.open(gz_path, "wt", encoding="utf-8") as f:
json.dump(sorted_index, f) json.dump(sorted_index, f)
@@ -257,18 +270,21 @@ class WebGenerator:
try: try:
template = self.env.get_template("root.html") template = self.env.get_template("root.html")
major_to_minors = defaultdict(list) # Group versions by major version
major_to_minors = {}
for v in versions: for v in versions:
try: try:
major, minor = v.split(".") major, minor = v.split(".")
major_to_minors[major].append(minor) major_to_minors.setdefault(major, []).append(minor)
except ValueError: except ValueError:
continue continue # Skip invalid versions
# Sort majors ascending, minors descending within each major
sorted_majors = sorted(major_to_minors, key=int) sorted_majors = sorted(major_to_minors, key=int)
max_minors = max((len(major_to_minors[m]) for m in sorted_majors), default=0) max_minors = max(len(major_to_minors[major]) for major in sorted_majors)
num_columns = len(sorted_majors) num_columns = len(sorted_majors)
# Create rows of versions for side-by-side display
version_rows = [] version_rows = []
for minor_idx in range(max_minors): for minor_idx in range(max_minors):
row = [] row = []
@@ -277,7 +293,7 @@ class WebGenerator:
if minor_idx < len(minors_list): if minor_idx < len(minors_list):
row.append((major, minors_list[minor_idx])) row.append((major, minors_list[minor_idx]))
else: else:
row.append(None) row.append(None) # Empty cell placeholder
version_rows.append(row) version_rows.append(row)
html = template.render( html = template.render(
@@ -295,28 +311,3 @@ class WebGenerator:
except Exception as e: except Exception as e:
logger.error(f"Error generating root index: {e}") logger.error(f"Error generating root index: {e}")
return False return False
def generate_404_page(self) -> bool:
"""Generate 404 error page.
Returns:
True if successful
"""
try:
template = self.env.get_template("404.html")
html = template.render(
title="404 - Page Not Found"
)
error_path = self.output_dir / "404.html"
with open(error_path, "w", encoding="utf-8") as f:
f.write(html)
logger.info("Generated 404 page")
return True
except Exception as e:
logger.error(f"Error generating 404 page: {e}")
return False

View File

@@ -1,137 +0,0 @@
{% extends "base.html" %}
{% block header_title %}Rocky Linux Man Pages{% endblock %}
{% block header_subtitle %}Man page documentation for Rocky Linux packages{% endblock %}
{% block extra_css %}
.error-container {
text-align: center;
padding: 4rem 2rem;
}
.error-code {
font-size: 8rem;
font-weight: 700;
color: var(--accent-primary);
line-height: 1;
margin-bottom: 1rem;
font-family: "JetBrains Mono", monospace;
}
.error-message {
font-size: 1.5rem;
color: var(--text-primary);
margin-bottom: 1rem;
}
.error-description {
color: var(--text-secondary);
margin-bottom: 2rem;
max-width: 600px;
margin-left: auto;
margin-right: auto;
}
.suggestions {
max-width: 600px;
margin: 2rem auto;
text-align: left;
}
.suggestions h3 {
color: var(--text-primary);
margin-bottom: 1rem;
}
.suggestions ul {
list-style: none;
padding: 0;
}
.suggestions li {
margin-bottom: 0.75rem;
padding-left: 1.5rem;
position: relative;
}
.suggestions li::before {
content: "→";
position: absolute;
left: 0;
color: var(--accent-primary);
}
.back-button {
display: inline-block;
padding: 0.75rem 1.5rem;
background: var(--accent-primary);
color: white;
text-decoration: none;
border-radius: 6px;
font-weight: 500;
transition: all 0.2s;
margin-top: 2rem;
}
.back-button:hover {
background: var(--accent-secondary);
transform: translateY(-2px);
text-decoration: none;
}
@media (max-width: 768px) {
.error-code {
font-size: 5rem;
}
.error-message {
font-size: 1.25rem;
}
.error-container {
padding: 3rem 1rem;
}
}
@media (max-width: 480px) {
.error-code {
font-size: 4rem;
}
.error-message {
font-size: 1.1rem;
}
.error-container {
padding: 2rem 1rem;
}
.suggestions {
padding: 0 1rem;
}
}
{% endblock %}
{% block content %}
<div class="content">
<div class="error-container">
<div class="error-code">404</div>
<div class="error-message">Page Not Found</div>
<div class="error-description">
The page you're looking for doesn't exist or may have been moved.
</div>
<div class="suggestions">
<h3>Suggestions:</h3>
<ul>
<li>Check the URL for typos</li>
<li>Return to the <a href="/">home page</a> and navigate from there</li>
<li>Use the search feature on the version index page</li>
<li>The man page may be in a different version of Rocky Linux</li>
</ul>
</div>
<a href="/" class="back-button">Go to Home Page</a>
</div>
</div>
{% endblock %}

View File

@@ -112,47 +112,6 @@ font-size: 0.9em;
color: var(--success); color: var(--success);
} }
/* OPTIONS section specific styling */
/* Style paragraphs that contain option flags (b tags followed by i tags or immediately followed by Bd-indent) */
.man-content section.Sh p.Pp:has(+ .Bd-indent) {
font-weight: 600;
font-size: 1.05em;
margin-top: 1.5rem;
margin-bottom: 0.5rem;
padding: 0.5rem 0.75rem;
background: linear-gradient(90deg, var(--bg-tertiary) 0%, transparent 100%);
border-left: 3px solid var(--accent-primary);
}
.man-content section.Sh p.Pp:has(+ .Bd-indent) b {
color: var(--accent-primary);
font-size: 1em;
}
.man-content section.Sh p.Pp:has(+ .Bd-indent) i {
color: var(--text-secondary);
font-style: italic;
}
/* Indented description blocks */
.man-content .Bd-indent {
margin-left: 2.5rem;
margin-bottom: 1.5rem;
padding-left: 1rem;
border-left: 2px solid var(--border-color);
color: var(--text-primary);
}
/* Add spacing between nested paragraphs in descriptions */
.man-content .Bd-indent > p.Pp {
margin-top: 0.75rem;
margin-bottom: 0.75rem;
}
.man-content .Bd-indent > p.Pp:first-child {
margin-top: 0;
}
.man-content pre { .man-content pre {
background-color: var(--bg-primary); background-color: var(--bg-primary);
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
@@ -256,16 +215,6 @@ margin-left: 1rem;
.man-content .Bl-dash { .man-content .Bl-dash {
padding-left: 1rem; padding-left: 1rem;
} }
.man-content section.Sh p.Pp:has(+ .Bd-indent) {
font-size: 1em;
padding: 0.4rem 0.5rem;
}
.man-content .Bd-indent {
margin-left: 1.5rem;
padding-left: 0.75rem;
}
} }
@media (max-width: 480px) { @media (max-width: 480px) {