Compare commits
10 Commits
main
...
add-feedba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5651d6926 | ||
|
|
70414a552d | ||
|
|
1c6fa0e98c | ||
|
|
68b9310862 | ||
|
|
890d7fc8f9 | ||
|
|
907d92bb16 | ||
|
|
ffc0d11bbb | ||
|
|
fc2f024d60 | ||
|
|
47db0185c5 | ||
|
|
f474c238dc |
41
.github/workflows/build.yml
vendored
41
.github/workflows/build.yml
vendored
@@ -25,26 +25,39 @@ on:
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: rockylinux:9
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build Docker image
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
docker build -t rocky-man:latest .
|
||||
dnf install -y \
|
||||
python3.11 \
|
||||
python3.11-pip \
|
||||
mandoc \
|
||||
rpm-build \
|
||||
dnf-plugins-core \
|
||||
git
|
||||
|
||||
- name: Create output directories
|
||||
- name: Install UV
|
||||
run: |
|
||||
mkdir -p ./html ./tmp
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build man pages in container
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v "$(pwd)/html:/data/html" \
|
||||
-v "$(pwd)/tmp:/data/tmp" \
|
||||
rocky-man:latest \
|
||||
--versions ${{ github.event.inputs.versions || '8.10 9.6 10.0' }} \
|
||||
uv pip install --system -e .
|
||||
|
||||
- name: Build man pages
|
||||
run: |
|
||||
python3.11 -m rocky_man.main \
|
||||
--versions ${{ github.event.inputs.versions || '8.10 9.5' }} \
|
||||
--output-dir ./html \
|
||||
--download-dir ./tmp/downloads \
|
||||
--extract-dir ./tmp/extracts \
|
||||
--verbose
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
@@ -55,3 +68,11 @@ jobs:
|
||||
name: rocky-man-pages
|
||||
path: html/
|
||||
retention-days: 30
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
if: github.ref == 'refs/heads/main'
|
||||
uses: peaceiris/actions-gh-pages@v3
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./html
|
||||
force_orphan: true
|
||||
|
||||
Binary file not shown.
@@ -18,7 +18,7 @@ RUN dnf install -y epel-release \
|
||||
WORKDIR /app
|
||||
|
||||
# Copy project files
|
||||
COPY pyproject.toml README.md LICENSE ./
|
||||
COPY pyproject.toml README.md LICENSE THIRD-PARTY-LICENSES.md ./
|
||||
COPY src ./src
|
||||
COPY templates ./templates
|
||||
|
||||
|
||||
154
Jenkinsfile
vendored
154
Jenkinsfile
vendored
@@ -1,154 +0,0 @@
|
||||
// Jenkinsfile for Rocky Man
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
yaml """
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
labels:
|
||||
jenkins: agent
|
||||
spec:
|
||||
containers:
|
||||
- name: docker
|
||||
image: docker:24-dind
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: docker-sock
|
||||
mountPath: /var/run
|
||||
command:
|
||||
- dockerd-entrypoint.sh
|
||||
- name: docker-cli
|
||||
image: docker:24-cli
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
volumeMounts:
|
||||
- name: docker-sock
|
||||
mountPath: /var/run
|
||||
- name: b2
|
||||
image: backblazeit/b2:latest
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
volumes:
|
||||
- name: docker-sock
|
||||
emptyDir: {}
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
parameters {
|
||||
string(
|
||||
name: 'VERSIONS',
|
||||
defaultValue: '8.10 9.7 10.1',
|
||||
description: 'Rocky Linux versions to build (space-separated)'
|
||||
)
|
||||
string(
|
||||
name: 'B2_BUCKET_NAME',
|
||||
defaultValue: 'rockyman',
|
||||
description: 'B2 bucket name for uploads'
|
||||
)
|
||||
string(
|
||||
name: 'EXISTING_VERSIONS',
|
||||
defaultValue: '',
|
||||
description: 'Existing versions already built (space-separated)'
|
||||
)
|
||||
string(
|
||||
name: 'PARALLEL_DOWNLOADS',
|
||||
defaultValue: '5',
|
||||
description: 'Number of parallel downloads'
|
||||
)
|
||||
string(
|
||||
name: 'PARALLEL_CONVERSIONS',
|
||||
defaultValue: '10',
|
||||
description: 'Number of parallel conversions'
|
||||
)
|
||||
}
|
||||
|
||||
options {
|
||||
buildDiscarder(logRotator(numToKeepStr: '10'))
|
||||
timeout(time: 2, unit: 'HOURS')
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build Docker Image') {
|
||||
steps {
|
||||
container('docker-cli') {
|
||||
sh '''
|
||||
docker build -t rocky-man:${BUILD_NUMBER} .
|
||||
docker tag rocky-man:${BUILD_NUMBER} rocky-man:latest
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build Man Pages') {
|
||||
steps {
|
||||
container('docker-cli') {
|
||||
sh '''
|
||||
# Create output directories
|
||||
mkdir -p ./html ./tmp
|
||||
|
||||
# Run the container to build man pages
|
||||
docker run --rm \
|
||||
-v "$(pwd)/html:/data/html" \
|
||||
-v "$(pwd)/tmp:/data/tmp" \
|
||||
rocky-man:${BUILD_NUMBER} \
|
||||
--versions ${VERSIONS} \
|
||||
--parallel-downloads ${PARALLEL_DOWNLOADS} \
|
||||
--parallel-conversions ${PARALLEL_CONVERSIONS} \
|
||||
--existing-versions ${EXISTING_VERSIONS}
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Upload to B2') {
|
||||
when {
|
||||
expression { return params.B2_BUCKET_NAME != "" }
|
||||
}
|
||||
steps {
|
||||
container('docker-cli') {
|
||||
withCredentials([
|
||||
string(credentialsId: 'b2-app-id', variable: 'B2_APPLICATION_KEY_ID'),
|
||||
string(credentialsId: 'b2-app-key', variable: 'B2_APPLICATION_KEY')
|
||||
]) {
|
||||
sh '''
|
||||
docker run --rm \
|
||||
-v "$(pwd)/html:/workspace/html" \
|
||||
-e B2_APPLICATION_KEY \
|
||||
-e B2_APPLICATION_KEY_ID \
|
||||
backblazeit/b2:latest \
|
||||
b2v4 sync --compare-versions size /workspace/html/ "b2://${B2_BUCKET_NAME}/"
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
post {
|
||||
success {
|
||||
echo 'Build completed and uploaded to B2!'
|
||||
}
|
||||
failure {
|
||||
echo 'Build failed!'
|
||||
}
|
||||
cleanup {
|
||||
container('docker-cli') {
|
||||
sh '''
|
||||
docker rmi rocky-man:${BUILD_NUMBER} || true
|
||||
docker rmi rocky-man:latest || true
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Ctrl IQ, Inc.
|
||||
Copyright (c) 2024 Stephen Simpson
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
278
README.md
278
README.md
@@ -1,108 +1,133 @@
|
||||
# 🚀 Rocky Man 🚀
|
||||
# Rocky Man 📚
|
||||
|
||||
**Rocky Man** is a tool for generating searchable HTML documentation from Rocky Linux man pages across BaseOS and AppStream repositories for Rocky Linux 8, 9, and 10.
|
||||
|
||||
## Features
|
||||
|
||||
- Uses filelists.xml to pre-filter packages with man pages
|
||||
- Processes packages from BaseOS and AppStream repositories
|
||||
- Runs in containers on x86_64, aarch64, and arm64 architectures
|
||||
- Configurable cleanup of temporary files
|
||||
- Concurrent downloads and conversions
|
||||
- Supports Rocky Linux 8, 9, and 10
|
||||
- **Fast & Efficient**: Uses filelists.xml to pre-filter packages with man pages
|
||||
- **Complete Coverage**: All packages from BaseOS and AppStream repositories
|
||||
- **Container Ready**: Works on x86_64, aarch64, arm64, etc.
|
||||
- **Smart Cleanup**: Automatic cleanup of temporary files (configurable)
|
||||
- **Parallel Processing**: Concurrent downloads and conversions for maximum speed
|
||||
- **Multi-version**: Support for Rocky Linux 8, 9, and 10 simultaneously
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Podman
|
||||
### Podman (Recommended)
|
||||
|
||||
```bash
|
||||
# Build the image
|
||||
podman build -t rocky-man .
|
||||
|
||||
# Generate man pages for Rocky Linux 9.6 (using defaults, no custom args)
|
||||
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man
|
||||
|
||||
# Generate for specific versions (requires explicit paths)
|
||||
podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
|
||||
--versions 8.10 9.6 10.0 --output-dir /app/html
|
||||
|
||||
# With verbose logging
|
||||
podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
|
||||
--versions 9.6 --output-dir /app/html --verbose
|
||||
|
||||
# Keep downloaded RPMs (mount the download directory)
|
||||
podman run --rm -it \
|
||||
-v $(pwd)/html:/app/html:Z \
|
||||
-v $(pwd)/downloads:/app/tmp/downloads:Z \
|
||||
rocky-man --versions 9.6 --keep-rpms \
|
||||
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build the image
|
||||
docker build -t rocky-man .
|
||||
|
||||
# Generate for specific versions
|
||||
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man \
|
||||
--versions 8.10 9.6 10.0
|
||||
# Generate man pages (using defaults, no custom args)
|
||||
docker run --rm -v $(pwd)/html:/data/html rocky-man
|
||||
|
||||
# Keep downloaded RPMs for multiple builds
|
||||
podman run --rm -it \
|
||||
-v $(pwd)/html:/data/html:Z \
|
||||
-v $(pwd)/downloads:/data/tmp/downloads:Z \
|
||||
rocky-man --versions 9.6 --keep-rpms --verbose
|
||||
```
|
||||
# Generate for specific versions (requires explicit paths)
|
||||
docker run --rm -v $(pwd)/html:/app/html rocky-man \
|
||||
--versions 9.6 --output-dir /app/html
|
||||
|
||||
### View the HTML Locally
|
||||
# Interactive mode for debugging
|
||||
docker run --rm -it -v $(pwd)/html:/app/html rocky-man \
|
||||
--versions 9.6 --output-dir /app/html --verbose
|
||||
|
||||
Start a local web server to browse the generated documentation:
|
||||
|
||||
```bash
|
||||
python3 -m http.server -d ./html
|
||||
```
|
||||
|
||||
Then open [http://127.0.0.1:8000](http://127.0.0.1:8000) in your browser.
|
||||
|
||||
To use a different port:
|
||||
|
||||
```bash
|
||||
python3 -m http.server 8080 -d ./html
|
||||
# Keep downloaded RPMs (mount the download directory)
|
||||
docker run --rm -it \
|
||||
-v $(pwd)/html:/app/html \
|
||||
-v $(pwd)/downloads:/app/tmp/downloads \
|
||||
rocky-man --versions 9.6 --keep-rpms \
|
||||
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
|
||||
```
|
||||
|
||||
### Directory Structure in Container
|
||||
|
||||
The container uses the following paths:
|
||||
The container uses different paths depending on whether you pass custom arguments:
|
||||
|
||||
**Without custom arguments** (using Dockerfile CMD defaults):
|
||||
- `/data/html` - Generated HTML output
|
||||
- `/data/tmp/downloads` - Downloaded RPM files
|
||||
- `/data/tmp/extracts` - Extracted man page files
|
||||
|
||||
These paths are used by default and can be overridden with command-line arguments if needed.
|
||||
**With custom arguments** (argparse defaults from working directory `/app`):
|
||||
- `/app/html` - Generated HTML output
|
||||
- `/app/tmp/downloads` - Downloaded RPM files
|
||||
- `/app/tmp/extracts` - Extracted man page files
|
||||
|
||||
**Important**: When passing custom arguments, the container's CMD is overridden and the code falls back to relative paths (`./html` = `/app/html`). You must explicitly specify `--output-dir /app/html --download-dir /app/tmp/downloads` to match your volume mounts. Without this, files are written inside the container and lost when it stops (especially with `--rm`).
|
||||
|
||||
### Local Development
|
||||
|
||||
**Important**: Rocky Man requires Rocky Linux because it uses the system's native `python3-dnf` module to interact with DNF repositories. This module cannot be installed via pip and must come from the Rocky Linux system packages.
|
||||
#### Prerequisites
|
||||
|
||||
#### Option 1: Run in a Rocky Linux Container (Recommended)
|
||||
- Python 3.9+
|
||||
- pip (Python package manager)
|
||||
- mandoc (man page converter)
|
||||
- Rocky Linux system or container (for DNF)
|
||||
|
||||
#### Installation
|
||||
|
||||
```bash
|
||||
# Start a Rocky Linux container with your project mounted
|
||||
podman run --rm -it -v $(pwd):/workspace:Z rockylinux/rockylinux:9 /bin/bash
|
||||
|
||||
# Inside the container, navigate to the project
|
||||
cd /workspace
|
||||
|
||||
# Install epel-release for mandoc
|
||||
dnf install -y epel-release
|
||||
|
||||
# Install system dependencies
|
||||
# On Rocky Linux, install system dependencies
|
||||
dnf install -y python3 python3-pip python3-dnf mandoc rpm-build dnf-plugins-core
|
||||
|
||||
# Install Python dependencies
|
||||
pip3 install -e .
|
||||
|
||||
# Run the tool
|
||||
python3 -m rocky_man.main --versions 9.6 --output-dir ./html/
|
||||
```
|
||||
|
||||
#### Option 2: On a Native Rocky Linux System
|
||||
#### Usage
|
||||
|
||||
```bash
|
||||
# Install epel-release for mandoc
|
||||
dnf install -y epel-release
|
||||
# Generate man pages for Rocky 9.6
|
||||
python -m rocky_man.main --versions 9.6
|
||||
|
||||
# Install system dependencies
|
||||
dnf install -y python3 python3-pip python3-dnf mandoc rpm-build dnf-plugins-core
|
||||
# Generate for multiple versions (default)
|
||||
python -m rocky_man.main --versions 8.10 9.6 10.0
|
||||
|
||||
# Install Python dependencies
|
||||
pip3 install -e .
|
||||
# Custom output directory
|
||||
python -m rocky_man.main --output-dir /var/www/html/man --versions 9.6
|
||||
|
||||
# Run the tool
|
||||
python3 -m rocky_man.main --versions 9.6 --output-dir ./html/
|
||||
# Keep downloaded RPMs for debugging
|
||||
python -m rocky_man.main --keep-rpms --verbose
|
||||
|
||||
# Adjust parallelism for faster processing
|
||||
python -m rocky_man.main --parallel-downloads 10 --parallel-conversions 20
|
||||
|
||||
# Use a different mirror
|
||||
python -m rocky_man.main --mirror https://mirrors.example.com/
|
||||
|
||||
# Only BaseOS (faster)
|
||||
python -m rocky_man.main --repo-types BaseOS --versions 9.6
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
Rocky Man is organized into components:
|
||||
Rocky Man is organized into clean, modular components:
|
||||
|
||||
```text
|
||||
```
|
||||
rocky-man/
|
||||
├── src/rocky_man/
|
||||
│ ├── models/ # Data models (Package, ManFile)
|
||||
@@ -118,28 +143,22 @@ rocky-man/
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Package Discovery** - Parses repository metadata (`repodata/repomd.xml` and `filelists.xml.gz`) to identify packages containing files in `/usr/share/man/` directories
|
||||
2. **Package Download** - Downloads identified RPM packages using DNF, with configurable parallel downloads (default: 5)
|
||||
3. **Man Page Extraction** - Extracts man page files from RPMs using `rpm2cpio`, filtering by section and language based on configuration
|
||||
4. **HTML Conversion** - Converts troff-formatted man pages to HTML using mandoc, with parallel processing (default: 10 workers)
|
||||
5. **Cross-Reference Linking** - Parses converted HTML to add hyperlinks between man page references (e.g., `bash(1)` becomes clickable)
|
||||
6. **Index Generation** - Creates search indexes (JSON/gzipped) and navigation pages using Jinja2 templates
|
||||
7. **Cleanup** - Removes temporary files (RPMs and extracted content) unless `--keep-rpms` or `--keep-extracts` is specified
|
||||
1. **Package Discovery** - Parse repository `filelists.xml` to identify packages with man pages
|
||||
2. **Smart Download** - Download only packages containing man pages with parallel downloads
|
||||
3. **Extraction** - Extract man page files from RPM packages
|
||||
4. **Conversion** - Convert troff format to HTML using mandoc
|
||||
5. **Web Generation** - Wrap HTML in templates and generate search index
|
||||
6. **Cleanup** - Automatically remove temporary files (configurable)
|
||||
|
||||
## Command Line Options
|
||||
|
||||
```bash
|
||||
usage: main.py [-h] [--versions VERSIONS [VERSIONS ...]]
|
||||
[--repo-types REPO_TYPES [REPO_TYPES ...]]
|
||||
[--output-dir OUTPUT_DIR] [--download-dir DOWNLOAD_DIR]
|
||||
[--extract-dir EXTRACT_DIR] [--keep-rpms] [--keep-extracts]
|
||||
[--parallel-downloads PARALLEL_DOWNLOADS]
|
||||
[--parallel-conversions PARALLEL_CONVERSIONS] [--mirror MIRROR]
|
||||
[--vault] [--existing-versions [VERSION ...]]
|
||||
[--template-dir TEMPLATE_DIR] [-v]
|
||||
[--skip-sections [SKIP_SECTIONS ...]]
|
||||
[--skip-packages [SKIP_PACKAGES ...]] [--skip-languages]
|
||||
[--keep-languages] [--allow-all-sections]
|
||||
```
|
||||
usage: rocky-man [-h] [--versions VERSIONS [VERSIONS ...]]
|
||||
[--repo-types REPO_TYPES [REPO_TYPES ...]]
|
||||
[--output-dir OUTPUT_DIR] [--download-dir DOWNLOAD_DIR]
|
||||
[--extract-dir EXTRACT_DIR] [--keep-rpms] [--keep-extracts]
|
||||
[--parallel-downloads N] [--parallel-conversions N]
|
||||
[--mirror URL] [--template-dir DIR] [-v]
|
||||
|
||||
Generate HTML documentation for Rocky Linux man pages
|
||||
|
||||
@@ -150,11 +169,11 @@ optional arguments:
|
||||
--repo-types REPO_TYPES [REPO_TYPES ...]
|
||||
Repository types to process (default: BaseOS AppStream)
|
||||
--output-dir OUTPUT_DIR
|
||||
Output directory for HTML files (default: /data/html)
|
||||
Output directory for HTML files (default: ./html)
|
||||
--download-dir DOWNLOAD_DIR
|
||||
Directory for downloading packages (default: /data/tmp/downloads)
|
||||
Directory for downloading packages (default: ./tmp/downloads)
|
||||
--extract-dir EXTRACT_DIR
|
||||
Directory for extracting man pages (default: /data/tmp/extracts)
|
||||
Directory for extracting man pages (default: ./tmp/extracts)
|
||||
--keep-rpms Keep downloaded RPM files after processing
|
||||
--keep-extracts Keep extracted man files after processing
|
||||
--parallel-downloads PARALLEL_DOWNLOADS
|
||||
@@ -177,11 +196,80 @@ optional arguments:
|
||||
--allow-all-sections Include all man sections (overrides --skip-sections)
|
||||
```
|
||||
|
||||
## Attribution
|
||||
## Troubleshooting
|
||||
|
||||
The man pages displayed in this documentation are sourced from Rocky Linux distribution packages. All man page content is copyrighted by their respective authors and distributed under the licenses specified within each man page.
|
||||
### DNF Errors
|
||||
|
||||
This tool generates HTML documentation from man pages contained in Rocky Linux packages but does not modify the content of the man pages themselves.
|
||||
**Problem**: `dnf` module not found or repository errors
|
||||
|
||||
**Solution**: Ensure you're running on Rocky Linux or in a Rocky Linux container:
|
||||
|
||||
```bash
|
||||
# Run in Rocky Linux container
|
||||
podman run --rm -it -v $(pwd):/app rockylinux:9 /bin/bash
|
||||
cd /app
|
||||
|
||||
# Install dependencies
|
||||
dnf install -y python3 python3-dnf mandoc rpm-build dnf-plugins-core
|
||||
|
||||
# Run the script
|
||||
python3 -m rocky_man.main --versions 9.6
|
||||
```
|
||||
|
||||
### Mandoc Not Found
|
||||
|
||||
**Problem**: `mandoc: command not found`
|
||||
|
||||
**Solution**: Install mandoc:
|
||||
|
||||
```bash
|
||||
dnf install -y mandoc
|
||||
```
|
||||
|
||||
### Permission Errors in Container
|
||||
|
||||
**Problem**: Cannot write to mounted volume
|
||||
|
||||
**Solution**: Use the `:Z` flag with podman for SELinux contexts:
|
||||
|
||||
```bash
|
||||
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man
|
||||
```
|
||||
|
||||
For Docker, ensure the volume path is absolute:
|
||||
|
||||
```bash
|
||||
docker run --rm -v "$(pwd)/html":/data/html rocky-man
|
||||
```
|
||||
|
||||
### Out of Memory
|
||||
|
||||
**Problem**: Process killed due to memory
|
||||
|
||||
**Solution**: Reduce parallelism:
|
||||
|
||||
```bash
|
||||
python -m rocky_man.main --parallel-downloads 2 --parallel-conversions 5
|
||||
```
|
||||
|
||||
### Slow Downloads
|
||||
|
||||
**Problem**: Downloads are very slow
|
||||
|
||||
**Solution**: Use a closer mirror:
|
||||
|
||||
```bash
|
||||
# Find mirrors at: https://mirrors.rockylinux.org/mirrormanager/mirrors
|
||||
python -m rocky_man.main --mirror https://mirror.example.com/rocky/
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Use closer mirrors** - Significant speed improvement for downloads
|
||||
2. **Increase parallelism** - If you have bandwidth: `--parallel-downloads 15`
|
||||
3. **Process one repo at a time** - Use `--repo-types BaseOS` first, then `--repo-types AppStream`
|
||||
4. **Keep RPMs for re-runs** - Use `--keep-rpms` if testing
|
||||
5. **Run in container** - More consistent performance
|
||||
|
||||
## License
|
||||
|
||||
@@ -189,16 +277,20 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
||||
|
||||
### Third-Party Software
|
||||
|
||||
This project uses several open source components.
|
||||
|
||||
Key dependencies include:
|
||||
|
||||
- **mandoc** - Man page converter (ISC License)
|
||||
- **python3-dnf** - DNF package manager Python bindings (GPL-2.0-or-later)
|
||||
- **Fuse.js** - Client-side search (Apache 2.0)
|
||||
- **Python packages**: requests, rpmfile, Jinja2, lxml, zstandard
|
||||
- **Fonts**: Red Hat Display, Red Hat Text, JetBrains Mono (SIL OFL)
|
||||
This project uses several open source components. See [THIRD-PARTY-LICENSES.md](THIRD-PARTY-LICENSES.md) for complete license information and attributions.
|
||||
|
||||
### Trademark Notice
|
||||
|
||||
Rocky Linux is a trademark of the Rocky Enterprise Software Foundation (RESF). This project is not officially affiliated with or endorsed by RESF. All trademarks are the property of their respective owners. This project complies with RESF's trademark usage guidelines.
|
||||
Rocky Linux™ is a trademark of the Rocky Enterprise Software Foundation (RESF). This project is not officially affiliated with or endorsed by RESF. All trademarks are the property of their respective owners. This project complies with RESF's trademark usage guidelines.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions welcome! Please:
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
||||
3. Make your changes with proper documentation
|
||||
4. Test thoroughly
|
||||
5. Commit with clear messages (`git commit -m 'feat: add amazing feature'`)
|
||||
6. Push to your branch (`git push origin feature/amazing-feature`)
|
||||
7. Open a Pull Request
|
||||
|
||||
@@ -9,11 +9,11 @@ authors = [
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"requests>=2.32.0",
|
||||
"rpmfile>=2.1.0",
|
||||
"requests>=2.31.0",
|
||||
"rpmfile>=2.0.0",
|
||||
"jinja2>=3.1.0",
|
||||
"lxml>=6.0.0",
|
||||
"zstandard>=0.25.0",
|
||||
"lxml>=5.0.0",
|
||||
"zstandard>=0.18.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -43,13 +43,18 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
|
||||
all_man_files = []
|
||||
|
||||
# Process each repository type
|
||||
for repo_type in config.repo_types:
|
||||
logger.info(f"Processing {repo_type} repository")
|
||||
|
||||
# Use first available architecture (man pages are arch-independent)
|
||||
arch = config.architectures[0]
|
||||
|
||||
# Create cache dir for this repo
|
||||
cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
|
||||
|
||||
try:
|
||||
# Initialize repository manager
|
||||
repo_manager = RepoManager(
|
||||
config=config,
|
||||
version=version,
|
||||
@@ -59,6 +64,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
download_dir=version_download_dir,
|
||||
)
|
||||
|
||||
# List packages (with man pages only)
|
||||
packages = repo_manager.list_packages(with_manpages_only=True)
|
||||
|
||||
if not packages:
|
||||
@@ -67,6 +73,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
|
||||
logger.info(f"Found {len(packages)} packages with man pages in {repo_type}")
|
||||
|
||||
# Filter out packages that should be skipped
|
||||
if config.skip_packages:
|
||||
original_count = len(packages)
|
||||
packages = [
|
||||
@@ -79,11 +86,13 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
)
|
||||
logger.info(f"Processing {len(packages)} packages")
|
||||
|
||||
# Download packages
|
||||
logger.info("Downloading packages...")
|
||||
downloaded = repo_manager.download_packages(
|
||||
packages, max_workers=config.parallel_downloads
|
||||
)
|
||||
|
||||
# Extract man pages
|
||||
logger.info("Extracting man pages...")
|
||||
extractor = ManPageExtractor(
|
||||
version_extract_dir,
|
||||
@@ -96,6 +105,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
|
||||
logger.info(f"Extracted {len(man_files)} man pages")
|
||||
|
||||
# Read content for each man file
|
||||
logger.info("Reading man page content...")
|
||||
man_files_with_content = []
|
||||
for man_file in man_files:
|
||||
@@ -103,6 +113,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
if content:
|
||||
man_files_with_content.append((man_file, content))
|
||||
|
||||
# Convert to HTML
|
||||
logger.info("Converting man pages to HTML...")
|
||||
converter = ManPageConverter(version_output_dir)
|
||||
converted = converter.convert_many(
|
||||
@@ -111,6 +122,7 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
|
||||
all_man_files.extend(converted)
|
||||
|
||||
# Cleanup if requested
|
||||
if not config.keep_rpms:
|
||||
logger.info("Cleaning up downloaded packages...")
|
||||
for package in downloaded:
|
||||
@@ -129,21 +141,30 @@ def process_version(config: Config, version: str, template_dir: Path) -> bool:
|
||||
logger.error(f"No man pages were successfully processed for version {version}")
|
||||
return False
|
||||
|
||||
# Generate web pages
|
||||
logger.info("Generating web pages...")
|
||||
web_gen = WebGenerator(template_dir, config.output_dir)
|
||||
|
||||
# Generate search index
|
||||
search_index = web_gen.generate_search_index(all_man_files, version)
|
||||
web_gen.save_search_index(search_index, version)
|
||||
|
||||
# Generate index page
|
||||
web_gen.generate_index(version, search_index)
|
||||
|
||||
# Generate packages index page
|
||||
web_gen.generate_packages_index(version, search_index)
|
||||
|
||||
# Set HTML paths for all man files
|
||||
for man_file in all_man_files:
|
||||
if not man_file.html_path:
|
||||
man_file.html_path = web_gen._get_manpage_path(man_file, version)
|
||||
|
||||
# Link cross-references between man pages
|
||||
logger.info("Linking cross-references...")
|
||||
converter.link_cross_references(all_man_files, version)
|
||||
|
||||
# Wrap man pages in templates
|
||||
logger.info("Generating man page HTML...")
|
||||
for man_file in all_man_files:
|
||||
web_gen.generate_manpage_html(man_file, version)
|
||||
@@ -177,22 +198,22 @@ def main():
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=Path("/data/html"),
|
||||
help="Output directory for HTML files (default: /data/html)",
|
||||
default=Path("./html"),
|
||||
help="Output directory for HTML files (default: ./html)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--download-dir",
|
||||
type=Path,
|
||||
default=Path("/data/tmp/downloads"),
|
||||
help="Directory for downloading packages (default: /data/tmp/downloads)",
|
||||
default=Path("./tmp/downloads"),
|
||||
help="Directory for downloading packages (default: ./tmp/downloads)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--extract-dir",
|
||||
type=Path,
|
||||
default=Path("/data/tmp/extracts"),
|
||||
help="Directory for extracting man pages (default: /data/tmp/extracts)",
|
||||
default=Path("./tmp/extracts"),
|
||||
help="Directory for extracting man pages (default: ./tmp/extracts)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -286,17 +307,21 @@ def main():
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging
|
||||
setup_logging(args.verbose)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
skip_languages = True
|
||||
# Handle filtering options
|
||||
skip_languages = True # default
|
||||
if args.keep_languages:
|
||||
skip_languages = False
|
||||
elif args.skip_languages is not None:
|
||||
skip_languages = args.skip_languages
|
||||
|
||||
# Determine content directory
|
||||
content_dir = "vault/rocky" if args.vault else "pub/rocky"
|
||||
|
||||
# Create configuration
|
||||
config = Config(
|
||||
base_url=args.mirror,
|
||||
content_dir=content_dir,
|
||||
@@ -315,6 +340,7 @@ def main():
|
||||
allow_all_sections=args.allow_all_sections,
|
||||
)
|
||||
|
||||
# Get existing versions from scan and argument
|
||||
scanned_versions = [
|
||||
d.name
|
||||
for d in config.output_dir.iterdir()
|
||||
@@ -322,6 +348,7 @@ def main():
|
||||
]
|
||||
arg_versions = args.existing_versions or []
|
||||
|
||||
# Sort versions numerically by (major, minor)
|
||||
def version_key(v):
|
||||
try:
|
||||
major, minor = v.split(".")
|
||||
@@ -338,6 +365,7 @@ def main():
|
||||
logger.info(f"Repositories: {', '.join(config.repo_types)}")
|
||||
logger.info(f"Output directory: {config.output_dir}")
|
||||
|
||||
# Log filtering configuration
|
||||
if config.skip_sections:
|
||||
logger.info(f"Skipping man sections: {', '.join(config.skip_sections)}")
|
||||
else:
|
||||
@@ -351,6 +379,7 @@ def main():
|
||||
else:
|
||||
logger.info("Including all languages")
|
||||
|
||||
# Process each version
|
||||
processed_versions = []
|
||||
for version in config.versions:
|
||||
try:
|
||||
@@ -363,13 +392,11 @@ def main():
|
||||
logger.error("No versions were successfully processed")
|
||||
return 1
|
||||
|
||||
# Generate root index
|
||||
logger.info("Generating root index page...")
|
||||
web_gen = WebGenerator(args.template_dir, config.output_dir)
|
||||
web_gen.generate_root_index(all_versions)
|
||||
|
||||
logger.info("Generating 404 page...")
|
||||
web_gen.generate_404_page()
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("Processing complete!")
|
||||
logger.info(f"Generated documentation for: {', '.join(processed_versions)}")
|
||||
|
||||
@@ -35,22 +35,35 @@ class ManFile:
|
||||
self._parse_path()
|
||||
|
||||
def _parse_path(self):
|
||||
"""Extract section, name, and language from the file path."""
|
||||
"""Extract section, name, and language from the file path.
|
||||
|
||||
Example paths:
|
||||
/usr/share/man/man1/bash.1.gz
|
||||
/usr/share/man/es/man1/bash.1.gz
|
||||
/usr/share/man/man3/printf.3.gz
|
||||
"""
|
||||
parts = self.file_path.parts
|
||||
filename = self.file_path.name
|
||||
|
||||
# Remove .gz extension if present
|
||||
if filename.endswith('.gz'):
|
||||
filename = filename[:-3]
|
||||
|
||||
# Extract section from parent directory (e.g., 'man1', 'man3p', 'man3pm')
|
||||
for part in reversed(parts):
|
||||
if part.startswith('man') and len(part) > 3:
|
||||
# Check if it starts with 'man' followed by a digit
|
||||
if part[3].isdigit():
|
||||
self.section = part[3:]
|
||||
break
|
||||
|
||||
# Extract section from filename if not found yet (e.g., 'foo.3pm' -> section '3pm')
|
||||
# and extract name
|
||||
name_parts = filename.split('.')
|
||||
if len(name_parts) >= 2:
|
||||
# Try to identify section from last part
|
||||
potential_section = name_parts[-1]
|
||||
# Section is typically digit optionally followed by letters (1, 3p, 3pm, etc.)
|
||||
if potential_section and potential_section[0].isdigit():
|
||||
if not self.section:
|
||||
self.section = potential_section
|
||||
@@ -60,10 +73,14 @@ class ManFile:
|
||||
else:
|
||||
self.name = name_parts[0]
|
||||
|
||||
# Check for language subdirectory
|
||||
# Pattern: /usr/share/man/<lang>/man<section>/
|
||||
for i, part in enumerate(parts):
|
||||
if part == 'man' and i + 1 < len(parts):
|
||||
next_part = parts[i + 1]
|
||||
# If next part is not 'man<digit>', it's a language code
|
||||
if not (next_part.startswith('man') and next_part[3:].isdigit()):
|
||||
# Common language codes are 2-5 chars (en, es, pt_BR, etc.)
|
||||
if len(next_part) <= 5:
|
||||
self.language = next_part
|
||||
break
|
||||
@@ -76,12 +93,14 @@ class ManFile:
|
||||
@property
|
||||
def html_filename(self) -> str:
|
||||
"""Get the HTML filename for this man page."""
|
||||
# Clean name for filesystem safety
|
||||
safe_name = self._clean_filename(self.name)
|
||||
suffix = f".{self.language}" if self.language else ""
|
||||
return f"{safe_name}.{self.section}{suffix}.html"
|
||||
|
||||
def _clean_filename(self, name: str) -> str:
|
||||
"""Clean filename for filesystem safety."""
|
||||
# Replace problematic characters
|
||||
name = name.replace('/', '_')
|
||||
name = name.replace(':', '_')
|
||||
name = re.sub(r'\.\.', '__', name)
|
||||
@@ -89,13 +108,19 @@ class ManFile:
|
||||
|
||||
@property
|
||||
def uri_path(self) -> str:
|
||||
"""Get the URI path for this man page (relative to version root)."""
|
||||
"""Get the URI path for this man page (relative to version root).
|
||||
|
||||
Returns path like: 'bash/man1/bash.1.html'
|
||||
"""
|
||||
if not self.html_path:
|
||||
return ""
|
||||
# Get path relative to the version directory
|
||||
# Assuming structure: html/<version>/<package>/<section>/<file>.html
|
||||
parts = self.html_path.parts
|
||||
try:
|
||||
# Find the version part (e.g., '9.5') and return everything after it
|
||||
for i, part in enumerate(parts):
|
||||
if re.match(r'\d+\.\d+', part):
|
||||
if re.match(r'\d+\.\d+', part): # Version pattern
|
||||
return '/'.join(parts[i+1:])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
@@ -38,11 +38,15 @@ class ManPageConverter:
|
||||
def _check_mandoc() -> bool:
|
||||
"""Check if mandoc is available."""
|
||||
try:
|
||||
# Run mandoc with no arguments - it will show usage and exit
|
||||
# We just want to verify the command exists, not that it succeeds
|
||||
subprocess.run(["mandoc"], capture_output=True, timeout=5)
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
# mandoc command not found
|
||||
return False
|
||||
except Exception:
|
||||
# Other errors (timeout, etc) - but mandoc exists
|
||||
return True
|
||||
|
||||
def convert(self, man_file: ManFile, content: str) -> bool:
|
||||
@@ -56,20 +60,26 @@ class ManPageConverter:
|
||||
True if conversion successful
|
||||
"""
|
||||
try:
|
||||
# Run mandoc to convert to HTML
|
||||
html = self._run_mandoc(content)
|
||||
if not html:
|
||||
logger.warning(f"mandoc produced no output for {man_file.display_name}")
|
||||
return False
|
||||
|
||||
# Clean up HTML
|
||||
html = self._clean_html(html)
|
||||
|
||||
# Check if output indicates this is a symlink/redirect
|
||||
# Check if mandoc output indicates this is a symlink/redirect
|
||||
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
|
||||
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
|
||||
# or: <div class="manual-text">See the file man1/builtin.1.</div>
|
||||
symlink_match = re.search(
|
||||
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
if not symlink_match:
|
||||
# Try simpler pattern without "See the file" or period
|
||||
symlink_match = re.search(
|
||||
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
|
||||
html,
|
||||
@@ -84,9 +94,14 @@ class ManPageConverter:
|
||||
)
|
||||
html = self._generate_redirect_html({"name": name, "section": section})
|
||||
|
||||
# Store in ManFile object
|
||||
man_file.html_content = html
|
||||
|
||||
# Determine output path
|
||||
output_path = self._get_output_path(man_file)
|
||||
man_file.html_path = output_path
|
||||
|
||||
# Save HTML file
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
@@ -113,11 +128,13 @@ class ManPageConverter:
|
||||
converted = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all conversion tasks
|
||||
future_to_manfile = {
|
||||
executor.submit(self.convert, man_file, content): man_file
|
||||
for man_file, content in man_files
|
||||
}
|
||||
|
||||
# Collect results
|
||||
for future in as_completed(future_to_manfile):
|
||||
man_file = future_to_manfile[future]
|
||||
try:
|
||||
@@ -149,6 +166,7 @@ class ManPageConverter:
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode("utf-8", errors="replace")
|
||||
logger.warning(f"mandoc returned error: {stderr}")
|
||||
# Sometimes mandoc returns non-zero but still produces output
|
||||
if result.stdout:
|
||||
return result.stdout.decode("utf-8", errors="replace")
|
||||
return None
|
||||
@@ -171,27 +189,15 @@ class ManPageConverter:
|
||||
Returns:
|
||||
Cleaned HTML
|
||||
"""
|
||||
# Fix empty header cells
|
||||
# Remove empty parentheses in header cells
|
||||
html = re.sub(
|
||||
r'<td class="head-(ltitle|rtitle)">\(\)</td>',
|
||||
r'<td class="head-\1"></td>',
|
||||
html,
|
||||
r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
|
||||
)
|
||||
html = re.sub(
|
||||
r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
|
||||
)
|
||||
|
||||
# Remove empty <p class="Pp"></p> tags (from .sp directives in troff)
|
||||
html = re.sub(r'<p class="Pp">\s*</p>', '', html)
|
||||
|
||||
# Clean up trailing whitespace and br tags in pre blocks
|
||||
# Match: <pre>...</pre> and clean trailing <br/> followed by whitespace
|
||||
def clean_pre_block(match):
|
||||
content = match.group(1)
|
||||
# Remove trailing <br/> tags and whitespace before closing </pre>
|
||||
content = re.sub(r'<br\s*/>\s*$', '', content)
|
||||
content = re.sub(r'\s+$', '', content)
|
||||
return f'<pre>{content}</pre>'
|
||||
|
||||
html = re.sub(r'<pre>(.*?)</pre>', clean_pre_block, html, flags=re.DOTALL)
|
||||
|
||||
# Strip leading/trailing whitespace
|
||||
html = html.strip()
|
||||
|
||||
return html
|
||||
@@ -207,8 +213,12 @@ class ManPageConverter:
|
||||
"""
|
||||
name = target_info["name"]
|
||||
section = target_info["section"]
|
||||
|
||||
# Generate the relative path to the target man page
|
||||
# Symlinks are in the same package, just different file names
|
||||
target_filename = f"{name}.{section}.html"
|
||||
|
||||
# Generate simple redirect HTML with a working hyperlink
|
||||
html = f'''<div class="symlink-notice" style="padding: 2rem; text-align: center; background-color: var(--bg-tertiary); border-radius: 8px; border: 1px solid var(--border-color);">
|
||||
<p style="font-size: 1.2rem; margin-bottom: 1.5rem; color: var(--text-primary);">
|
||||
This is an alias for <b>{name}</b>({section}).
|
||||
@@ -220,26 +230,35 @@ class ManPageConverter:
|
||||
return html
|
||||
|
||||
def link_cross_references(self, man_files: List[ManFile], version: str) -> None:
|
||||
"""Add hyperlinks to cross-references in man pages.
|
||||
"""Add hyperlinks to cross-references in SEE ALSO sections.
|
||||
|
||||
Goes through all converted HTML files and converts man page references
|
||||
like pty(4) into working hyperlinks.
|
||||
|
||||
Args:
|
||||
man_files: List of all converted ManFile objects
|
||||
version: Rocky Linux version
|
||||
"""
|
||||
# Build lookup index: (name, section) -> relative_path
|
||||
lookup = {}
|
||||
for mf in man_files:
|
||||
key = (mf.name.lower(), str(mf.section))
|
||||
if key not in lookup:
|
||||
# Store the relative path from the version root
|
||||
lookup[key] = f"{mf.package_name}/man{mf.section}/{mf.html_filename}"
|
||||
|
||||
logger.info(f"Linking cross-references across {len(man_files)} man pages...")
|
||||
|
||||
# Process each man page HTML content
|
||||
for man_file in man_files:
|
||||
if not man_file.html_content:
|
||||
continue
|
||||
|
||||
try:
|
||||
html = man_file.html_content
|
||||
|
||||
# Find and replace man page references
|
||||
# Mandoc outputs references as: <b>name</b>(section)
|
||||
# Pattern matches both <b>name</b>(section) and plain name(section)
|
||||
pattern = (
|
||||
r"<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)"
|
||||
)
|
||||
@@ -247,25 +266,42 @@ class ManPageConverter:
|
||||
def replace_reference(match):
|
||||
full_match = match.group(0)
|
||||
|
||||
# Skip if already inside an <a> tag
|
||||
# Check if this match is already inside an <a> tag
|
||||
# Look back up to 500 chars for context
|
||||
before_text = html[max(0, match.start() - 500) : match.start()]
|
||||
|
||||
# Find the last <a and last </a> before this match
|
||||
last_open = before_text.rfind("<a ")
|
||||
last_close = before_text.rfind("</a>")
|
||||
|
||||
# If the last <a> is after the last </a>, we're inside a link
|
||||
if last_open > last_close:
|
||||
return full_match
|
||||
|
||||
name = (match.group(1) or match.group(3)).lower()
|
||||
section = match.group(2) or match.group(4)
|
||||
if match.group(1): # <b>name</b>(section) format
|
||||
name = match.group(1).lower()
|
||||
section = match.group(2)
|
||||
else: # plain name(section) format
|
||||
name = match.group(3).lower()
|
||||
section = match.group(4)
|
||||
|
||||
# Look up the referenced man page
|
||||
key = (name, section)
|
||||
if key in lookup:
|
||||
# Calculate relative path from current file to target
|
||||
target_path = lookup[key]
|
||||
# File structure: output_dir/version/package_name/manN/file.html
|
||||
# Need to go up 3 levels to reach output root, then down to version/target
|
||||
# Current: version/package_name/manN/file.html
|
||||
# Target: version/other_package/manM/file.html
|
||||
rel_path = f"../../../{version}/{target_path}"
|
||||
return f'<a href="{rel_path}">{full_match}</a>'
|
||||
|
||||
return full_match
|
||||
|
||||
updated_html = re.sub(pattern, replace_reference, html)
|
||||
|
||||
# Update the content if something changed
|
||||
if updated_html != html:
|
||||
man_file.html_content = updated_html
|
||||
|
||||
@@ -277,7 +313,23 @@ class ManPageConverter:
|
||||
logger.info("Cross-reference linking complete")
|
||||
|
||||
def _get_output_path(self, man_file: ManFile) -> Path:
|
||||
"""Determine output path for HTML file."""
|
||||
"""Determine output path for HTML file.
|
||||
|
||||
Structure: output_dir/<package>/<section>/<name>.<section>[.<lang>].html
|
||||
|
||||
Args:
|
||||
man_file: ManFile object
|
||||
|
||||
Returns:
|
||||
Path for HTML output
|
||||
"""
|
||||
# Package directory
|
||||
pkg_dir = self.output_dir / man_file.package_name
|
||||
|
||||
# Section directory (man1, man2, etc.)
|
||||
section_dir = pkg_dir / f"man{man_file.section}"
|
||||
return section_dir / man_file.html_filename
|
||||
|
||||
# HTML filename
|
||||
filename = man_file.html_filename
|
||||
|
||||
return section_dir / filename
|
||||
|
||||
@@ -48,6 +48,7 @@ class ManPageExtractor:
|
||||
logger.warning(f"Package file not found: {package.name}")
|
||||
return []
|
||||
|
||||
# Create extraction directory for this package
|
||||
pkg_extract_dir = self.extract_dir / package.name
|
||||
pkg_extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -58,39 +59,33 @@ class ManPageExtractor:
|
||||
|
||||
with rpmfile.open(package.download_path) as rpm:
|
||||
for member in rpm.getmembers():
|
||||
# Check if this is a man page file
|
||||
if not self._is_manpage(member.name):
|
||||
continue
|
||||
|
||||
# Sanitize path to prevent path traversal attacks
|
||||
safe_name = member.name.lstrip('/')
|
||||
extract_path = pkg_extract_dir / safe_name
|
||||
|
||||
# Resolve to absolute path and verify it's within the extraction directory
|
||||
real_extract_path = extract_path.resolve()
|
||||
real_pkg_extract_dir = pkg_extract_dir.resolve()
|
||||
|
||||
if not real_extract_path.is_relative_to(real_pkg_extract_dir):
|
||||
logger.warning(f"Skipping file with path traversal attempt: {member.name}")
|
||||
continue
|
||||
|
||||
# Create ManFile object
|
||||
extract_path = pkg_extract_dir / member.name.lstrip('/')
|
||||
man_file = ManFile(
|
||||
file_path=real_extract_path,
|
||||
file_path=extract_path,
|
||||
package_name=package.name
|
||||
)
|
||||
|
||||
# Apply section filtering
|
||||
if self.skip_sections and man_file.section in self.skip_sections:
|
||||
logger.debug(f"Skipping {man_file.display_name} (section {man_file.section})")
|
||||
continue
|
||||
|
||||
# Apply language filtering
|
||||
if self.skip_languages and man_file.language and man_file.language != 'en':
|
||||
logger.debug(f"Skipping {man_file.display_name} (language {man_file.language})")
|
||||
continue
|
||||
|
||||
real_extract_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Extract the file
|
||||
extract_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
content = rpm.extractfile(member).read()
|
||||
with open(real_extract_path, 'wb') as f:
|
||||
with open(extract_path, 'wb') as f:
|
||||
f.write(content)
|
||||
|
||||
man_file.content = content
|
||||
@@ -123,11 +118,13 @@ class ManPageExtractor:
|
||||
all_man_files = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all extraction tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.extract_from_package, pkg): pkg
|
||||
for pkg in packages
|
||||
}
|
||||
|
||||
# Collect results
|
||||
for future in as_completed(future_to_pkg):
|
||||
pkg = future_to_pkg[future]
|
||||
try:
|
||||
@@ -153,15 +150,27 @@ class ManPageExtractor:
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Try reading as gzipped file first
|
||||
if man_file.file_path.suffix == '.gz':
|
||||
try:
|
||||
with gzip.open(man_file.file_path, 'rb') as f:
|
||||
return f.read().decode('utf-8', errors='replace')
|
||||
except gzip.BadGzipFile:
|
||||
pass
|
||||
with gzip.open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
else:
|
||||
# Read as plain text
|
||||
with open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
|
||||
with open(man_file.file_path, 'rb') as f:
|
||||
return f.read().decode('utf-8', errors='replace')
|
||||
# Decode with error handling
|
||||
return content.decode('utf-8', errors='replace')
|
||||
|
||||
except gzip.BadGzipFile:
|
||||
# Not a gzip file, try reading as plain text
|
||||
try:
|
||||
with open(man_file.file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
return content.decode('utf-8', errors='replace')
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {man_file.file_path}: {e}")
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading {man_file.file_path}: {e}")
|
||||
@@ -169,19 +178,37 @@ class ManPageExtractor:
|
||||
|
||||
@staticmethod
|
||||
def _is_manpage(path: str) -> bool:
|
||||
"""Check if a file path is a man page."""
|
||||
"""Check if a file path is a man page.
|
||||
|
||||
Args:
|
||||
path: File path to check
|
||||
|
||||
Returns:
|
||||
True if this looks like a man page file
|
||||
"""
|
||||
# Must contain /man/ in path
|
||||
if '/man/' not in path:
|
||||
return False
|
||||
|
||||
# Should be in /usr/share/man/ or /usr/man/
|
||||
if not ('/share/man/' in path or path.startswith('/usr/man/')):
|
||||
return False
|
||||
|
||||
# Common man page patterns
|
||||
# - /usr/share/man/man1/foo.1.gz
|
||||
# - /usr/share/man/es/man1/foo.1.gz
|
||||
# - /usr/share/man/man3/printf.3.gz
|
||||
|
||||
parts = path.split('/')
|
||||
return any(
|
||||
|
||||
# Check for man<digit> directory
|
||||
has_man_section = any(
|
||||
part.startswith('man') and len(part) > 3 and part[3].isdigit()
|
||||
for part in parts
|
||||
)
|
||||
|
||||
return has_man_section
|
||||
|
||||
def cleanup_extracts(self, package: Package):
|
||||
"""Clean up extracted files for a package.
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import gzip
|
||||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from typing import Set
|
||||
from typing import Set, Dict
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
@@ -38,16 +38,19 @@ class ContentsParser:
|
||||
"""
|
||||
logger.info(f"Fetching filelists for {self.repo_url}")
|
||||
|
||||
# Download and parse repomd.xml to find filelists location
|
||||
filelists_path = self._get_filelists_path()
|
||||
if not filelists_path:
|
||||
logger.warning("Could not find filelists in repository metadata")
|
||||
return set()
|
||||
|
||||
# Download filelists.xml
|
||||
filelists_file = self._download_filelists(filelists_path)
|
||||
if not filelists_file:
|
||||
logger.warning("Could not download filelists")
|
||||
return set()
|
||||
|
||||
# Parse filelists to find packages with man pages
|
||||
packages = self._parse_filelists(filelists_file)
|
||||
logger.info(f"Found {len(packages)} packages with man pages")
|
||||
|
||||
@@ -65,7 +68,11 @@ class ContentsParser:
|
||||
response = requests.get(repomd_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
# Find filelists entry
|
||||
# XML structure: <repomd><data type="filelists"><location href="..."/></data></repomd>
|
||||
ns = {'repo': 'http://linux.duke.edu/metadata/repo'}
|
||||
|
||||
for data in root.findall('repo:data', ns):
|
||||
@@ -74,7 +81,7 @@ class ContentsParser:
|
||||
if location is not None:
|
||||
return location.get('href')
|
||||
|
||||
# Fallback without namespace
|
||||
# Fallback: try without namespace
|
||||
for data in root.findall('data'):
|
||||
if data.get('type') == 'filelists':
|
||||
location = data.find('location')
|
||||
@@ -98,6 +105,7 @@ class ContentsParser:
|
||||
url = urljoin(self.repo_url, relative_path)
|
||||
cache_file = self.cache_dir / relative_path.split('/')[-1]
|
||||
|
||||
# Return cached file if it exists
|
||||
if cache_file.exists():
|
||||
logger.debug(f"Using cached filelists: {cache_file}")
|
||||
return cache_file
|
||||
@@ -130,26 +138,36 @@ class ContentsParser:
|
||||
packages = set()
|
||||
|
||||
try:
|
||||
# Open gzipped XML file
|
||||
with gzip.open(filelists_path, 'rb') as f:
|
||||
# Use iterparse for memory efficiency (files can be large)
|
||||
context = ET.iterparse(f, events=('start', 'end'))
|
||||
|
||||
current_package = None
|
||||
has_manpage = False
|
||||
|
||||
for event, elem in context:
|
||||
if event == 'start' and elem.tag.endswith('package'):
|
||||
current_package = elem.get('name')
|
||||
has_manpage = False
|
||||
if event == 'start':
|
||||
if elem.tag.endswith('package'):
|
||||
# Get package name from 'name' attribute
|
||||
current_package = elem.get('name')
|
||||
has_manpage = False
|
||||
|
||||
elif event == 'end':
|
||||
if elem.tag.endswith('file'):
|
||||
# Check if file path contains /man/
|
||||
file_path = elem.text
|
||||
if file_path and self._is_manpage_path(file_path):
|
||||
has_manpage = True
|
||||
if file_path and '/man/' in file_path:
|
||||
# Could be /usr/share/man/ or /usr/man/
|
||||
if '/share/man/' in file_path or file_path.startswith('/usr/man/'):
|
||||
has_manpage = True
|
||||
|
||||
elif elem.tag.endswith('package'):
|
||||
# End of package entry
|
||||
if has_manpage and current_package:
|
||||
packages.add(current_package)
|
||||
|
||||
# Clear element to free memory
|
||||
elem.clear()
|
||||
current_package = None
|
||||
has_manpage = False
|
||||
@@ -159,16 +177,45 @@ class ContentsParser:
|
||||
|
||||
return packages
|
||||
|
||||
@staticmethod
|
||||
def _is_manpage_path(file_path: str) -> bool:
|
||||
"""Check if a file path is a man page location.
|
||||
def get_package_man_files(self, filelists_path: Path) -> Dict[str, list]:
|
||||
"""Get detailed list of man files for each package.
|
||||
|
||||
Args:
|
||||
file_path: File path to check
|
||||
filelists_path: Path to filelists.xml.gz file
|
||||
|
||||
Returns:
|
||||
True if path is in a standard man page directory
|
||||
Dict mapping package name to list of man page paths
|
||||
"""
|
||||
return '/man/' in file_path and (
|
||||
'/share/man/' in file_path or file_path.startswith('/usr/man/')
|
||||
)
|
||||
packages = {}
|
||||
|
||||
try:
|
||||
with gzip.open(filelists_path, 'rb') as f:
|
||||
context = ET.iterparse(f, events=('start', 'end'))
|
||||
|
||||
current_package = None
|
||||
current_files = []
|
||||
|
||||
for event, elem in context:
|
||||
if event == 'start':
|
||||
if elem.tag.endswith('package'):
|
||||
current_package = elem.get('name')
|
||||
current_files = []
|
||||
|
||||
elif event == 'end':
|
||||
if elem.tag.endswith('file'):
|
||||
file_path = elem.text
|
||||
if file_path and '/share/man/' in file_path:
|
||||
current_files.append(file_path)
|
||||
|
||||
elif elem.tag.endswith('package'):
|
||||
if current_files and current_package:
|
||||
packages[current_package] = current_files
|
||||
|
||||
elem.clear()
|
||||
current_package = None
|
||||
current_files = []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing filelists: {e}")
|
||||
|
||||
return packages
|
||||
|
||||
@@ -52,6 +52,7 @@ class RepoManager:
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.download_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize DNF
|
||||
self.base = dnf.Base()
|
||||
self.base.conf.debuglevel = 0
|
||||
self.base.conf.errorlevel = 0
|
||||
@@ -66,23 +67,28 @@ class RepoManager:
|
||||
repo = dnf.repo.Repo(repo_id, self.base.conf)
|
||||
repo.baseurl = [self.repo_url]
|
||||
repo.enabled = True
|
||||
repo.gpgcheck = False
|
||||
repo.gpgcheck = False # We verify checksums separately
|
||||
|
||||
self.base.repos.add(repo)
|
||||
logger.info(f"Configured repository: {repo_id} at {self.repo_url}")
|
||||
|
||||
# Fill the sack (package database)
|
||||
self.base.fill_sack(load_system_repo=False, load_available_repos=True)
|
||||
logger.info("Repository metadata loaded")
|
||||
|
||||
def discover_packages_with_manpages(self) -> Set[str]:
|
||||
"""Discover which packages contain man pages using filelists.
|
||||
|
||||
This is the key optimization - we parse repository metadata
|
||||
to identify packages with man pages before downloading anything.
|
||||
|
||||
Returns:
|
||||
Set of package names that contain man pages
|
||||
"""
|
||||
if self.packages_with_manpages is not None:
|
||||
return self.packages_with_manpages
|
||||
|
||||
# Try pub first, then vault if it fails
|
||||
content_dirs = ["pub/rocky", "vault/rocky"]
|
||||
for content_dir in content_dirs:
|
||||
original_content_dir = self.config.content_dir
|
||||
@@ -93,9 +99,9 @@ class RepoManager:
|
||||
)
|
||||
parser = ContentsParser(repo_url, self.cache_dir)
|
||||
packages = parser.get_packages_with_manpages()
|
||||
if packages:
|
||||
if packages: # Only use if it has man pages
|
||||
self.packages_with_manpages = packages
|
||||
self.repo_url = repo_url
|
||||
self.repo_url = repo_url # Set for later use
|
||||
logger.info(f"Using repository: {repo_url}")
|
||||
break
|
||||
else:
|
||||
@@ -124,29 +130,39 @@ class RepoManager:
|
||||
f"Querying packages from {self.repo_type} ({self.version}/{self.arch})"
|
||||
)
|
||||
|
||||
# Get packages with man pages if filtering
|
||||
manpage_packages = None
|
||||
if with_manpages_only:
|
||||
manpage_packages = self.discover_packages_with_manpages()
|
||||
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
|
||||
|
||||
# Configure DNF repo now that we have the correct repo_url
|
||||
self._configure_repo()
|
||||
|
||||
packages = []
|
||||
|
||||
# Query all available packages
|
||||
query = self.base.sack.query().available()
|
||||
|
||||
# For each package name, get only one arch (prefer noarch, then our target arch)
|
||||
seen_names = set()
|
||||
|
||||
for pkg in query:
|
||||
pkg_name = pkg.name
|
||||
|
||||
# Skip if we've already added this package
|
||||
if pkg_name in seen_names:
|
||||
continue
|
||||
|
||||
# Skip if filtering and package doesn't have man pages
|
||||
if manpage_packages and pkg_name not in manpage_packages:
|
||||
continue
|
||||
|
||||
# Get repo information
|
||||
repo = pkg.repo
|
||||
baseurl = repo.baseurl[0] if repo and repo.baseurl else self.repo_url
|
||||
chksum_type, chksum_value = pkg.chksum if pkg.chksum else ("sha256", "")
|
||||
|
||||
# Create Package object
|
||||
package = Package(
|
||||
name=pkg_name,
|
||||
version=pkg.version,
|
||||
@@ -155,16 +171,16 @@ class RepoManager:
|
||||
repo_type=self.repo_type,
|
||||
location=pkg.location,
|
||||
baseurl=baseurl,
|
||||
checksum=chksum_value,
|
||||
checksum_type=chksum_type,
|
||||
has_manpages=bool(manpage_packages),
|
||||
checksum=pkg.chksum[1] if pkg.chksum else "", # chksum is (type, value)
|
||||
checksum_type=pkg.chksum[0] if pkg.chksum else "sha256",
|
||||
has_manpages=True if manpage_packages else False,
|
||||
)
|
||||
|
||||
packages.append(package)
|
||||
seen_names.add(pkg_name)
|
||||
|
||||
logger.info(f"Found {len(packages)} packages to process")
|
||||
return sorted(packages)
|
||||
return sorted(packages) # Sort by name for consistent ordering
|
||||
|
||||
def download_package(self, package: Package) -> bool:
|
||||
"""Download a single package.
|
||||
@@ -178,6 +194,7 @@ class RepoManager:
|
||||
download_path = self.download_dir / package.filename
|
||||
package.download_path = download_path
|
||||
|
||||
# Skip if already downloaded
|
||||
if download_path.exists():
|
||||
logger.debug(f"Package already downloaded: {package.filename}")
|
||||
return True
|
||||
@@ -187,6 +204,7 @@ class RepoManager:
|
||||
response = requests.get(package.download_url, timeout=300, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Download with progress (optional: could add progress bar here)
|
||||
with open(download_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
@@ -197,6 +215,7 @@ class RepoManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {package.filename}: {e}")
|
||||
# Clean up partial download
|
||||
if download_path.exists():
|
||||
download_path.unlink()
|
||||
return False
|
||||
@@ -216,10 +235,12 @@ class RepoManager:
|
||||
downloaded = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all download tasks
|
||||
future_to_pkg = {
|
||||
executor.submit(self.download_package, pkg): pkg for pkg in packages
|
||||
}
|
||||
|
||||
# Process completed downloads
|
||||
for future in as_completed(future_to_pkg):
|
||||
pkg = future_to_pkg[future]
|
||||
try:
|
||||
|
||||
@@ -24,26 +24,31 @@ class Config:
|
||||
parallel_conversions: Number of parallel HTML conversions
|
||||
"""
|
||||
|
||||
# Repository configuration
|
||||
base_url: str = "http://dl.rockylinux.org/"
|
||||
content_dir: str = "pub/rocky"
|
||||
versions: List[str] = None
|
||||
architectures: List[str] = None
|
||||
repo_types: List[str] = None
|
||||
|
||||
# Directory configuration
|
||||
download_dir: Path = Path("/data/tmp/downloads")
|
||||
extract_dir: Path = Path("/data/tmp/extracts")
|
||||
output_dir: Path = Path("/data/html")
|
||||
|
||||
# Cleanup options
|
||||
keep_rpms: bool = False
|
||||
keep_extracts: bool = False
|
||||
|
||||
# Performance options
|
||||
parallel_downloads: int = 5
|
||||
parallel_conversions: int = 10
|
||||
|
||||
# Filtering options
|
||||
skip_sections: List[str] = None
|
||||
skip_packages: List[str] = None
|
||||
skip_languages: bool = True
|
||||
allow_all_sections: bool = False
|
||||
skip_languages: bool = True # Skip non-English languages by default
|
||||
allow_all_sections: bool = False # Override skip_sections if True
|
||||
|
||||
def __post_init__(self):
|
||||
"""Set defaults and ensure directories exist."""
|
||||
@@ -51,16 +56,20 @@ class Config:
|
||||
self.versions = ["8.10", "9.6", "10.0"]
|
||||
|
||||
if self.architectures is None:
|
||||
# Man pages are arch-independent, so we just need one
|
||||
# We prefer x86_64 as it's most common, fallback to others
|
||||
self.architectures = ["x86_64", "aarch64", "ppc64le", "s390x"]
|
||||
|
||||
if self.repo_types is None:
|
||||
self.repo_types = ["BaseOS", "AppStream"]
|
||||
|
||||
# Set default skip sections (man3 library APIs)
|
||||
if self.skip_sections is None and not self.allow_all_sections:
|
||||
self.skip_sections = ["3", "3p", "3pm"]
|
||||
elif self.allow_all_sections:
|
||||
self.skip_sections = []
|
||||
|
||||
# Set default skip packages (high-volume API docs)
|
||||
if self.skip_packages is None:
|
||||
self.skip_packages = [
|
||||
"lapack",
|
||||
@@ -68,6 +77,7 @@ class Config:
|
||||
"gl-manpages",
|
||||
]
|
||||
|
||||
# Ensure all paths are Path objects
|
||||
self.download_dir = Path(self.download_dir)
|
||||
self.extract_dir = Path(self.extract_dir)
|
||||
self.output_dir = Path(self.output_dir)
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
@@ -34,6 +33,7 @@ class WebGenerator:
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup Jinja2 environment
|
||||
self.env = Environment(
|
||||
loader=FileSystemLoader(str(self.template_dir)),
|
||||
autoescape=select_autoescape(["html", "xml"]),
|
||||
@@ -66,6 +66,7 @@ class WebGenerator:
|
||||
content=man_file.html_content,
|
||||
)
|
||||
|
||||
# Ensure output path is set
|
||||
if not man_file.html_path:
|
||||
man_file.html_path = self._get_manpage_path(man_file, version)
|
||||
|
||||
@@ -126,18 +127,24 @@ class WebGenerator:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
packages_by_letter = defaultdict(list)
|
||||
# Group packages by first letter
|
||||
packages_by_letter = {}
|
||||
|
||||
for pkg_name, pages in search_data.items():
|
||||
first_char = pkg_name[0].upper()
|
||||
if not first_char.isalpha():
|
||||
first_char = "other"
|
||||
|
||||
if first_char not in packages_by_letter:
|
||||
packages_by_letter[first_char] = []
|
||||
|
||||
packages_by_letter[first_char].append(
|
||||
{"name": pkg_name, "count": len(pages)}
|
||||
)
|
||||
|
||||
for packages in packages_by_letter.values():
|
||||
packages.sort(key=lambda x: x["name"])
|
||||
# Sort packages within each letter
|
||||
for letter in packages_by_letter:
|
||||
packages_by_letter[letter].sort(key=lambda x: x["name"])
|
||||
|
||||
template = self.env.get_template("packages.html")
|
||||
|
||||
@@ -181,6 +188,7 @@ class WebGenerator:
|
||||
if pkg_name not in index:
|
||||
index[pkg_name] = {}
|
||||
|
||||
# Create entry for this man page
|
||||
entry = {
|
||||
"name": man_file.name,
|
||||
"section": man_file.section,
|
||||
@@ -190,6 +198,7 @@ class WebGenerator:
|
||||
"full_name": f"{man_file.package_name} - {man_file.display_name}",
|
||||
}
|
||||
|
||||
# Use display name as key (handles duplicates with different sections)
|
||||
key = man_file.display_name
|
||||
if man_file.language:
|
||||
key = f"{key}.{man_file.language}"
|
||||
@@ -214,11 +223,15 @@ class WebGenerator:
|
||||
|
||||
json_path = version_dir / "search.json"
|
||||
gz_path = version_dir / "search.json.gz"
|
||||
|
||||
# Sort for consistency
|
||||
sorted_index = {k: index[k] for k in sorted(index)}
|
||||
|
||||
# Save plain JSON
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(sorted_index, f, indent=2)
|
||||
|
||||
# Save gzipped JSON
|
||||
with gzip.open(gz_path, "wt", encoding="utf-8") as f:
|
||||
json.dump(sorted_index, f)
|
||||
|
||||
@@ -257,18 +270,21 @@ class WebGenerator:
|
||||
try:
|
||||
template = self.env.get_template("root.html")
|
||||
|
||||
major_to_minors = defaultdict(list)
|
||||
# Group versions by major version
|
||||
major_to_minors = {}
|
||||
for v in versions:
|
||||
try:
|
||||
major, minor = v.split(".")
|
||||
major_to_minors[major].append(minor)
|
||||
major_to_minors.setdefault(major, []).append(minor)
|
||||
except ValueError:
|
||||
continue
|
||||
continue # Skip invalid versions
|
||||
|
||||
# Sort majors ascending, minors descending within each major
|
||||
sorted_majors = sorted(major_to_minors, key=int)
|
||||
max_minors = max((len(major_to_minors[m]) for m in sorted_majors), default=0)
|
||||
max_minors = max(len(major_to_minors[major]) for major in sorted_majors)
|
||||
num_columns = len(sorted_majors)
|
||||
|
||||
# Create rows of versions for side-by-side display
|
||||
version_rows = []
|
||||
for minor_idx in range(max_minors):
|
||||
row = []
|
||||
@@ -277,7 +293,7 @@ class WebGenerator:
|
||||
if minor_idx < len(minors_list):
|
||||
row.append((major, minors_list[minor_idx]))
|
||||
else:
|
||||
row.append(None)
|
||||
row.append(None) # Empty cell placeholder
|
||||
version_rows.append(row)
|
||||
|
||||
html = template.render(
|
||||
@@ -295,28 +311,3 @@ class WebGenerator:
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating root index: {e}")
|
||||
return False
|
||||
|
||||
def generate_404_page(self) -> bool:
|
||||
"""Generate 404 error page.
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
try:
|
||||
template = self.env.get_template("404.html")
|
||||
|
||||
html = template.render(
|
||||
title="404 - Page Not Found"
|
||||
)
|
||||
|
||||
error_path = self.output_dir / "404.html"
|
||||
|
||||
with open(error_path, "w", encoding="utf-8") as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info("Generated 404 page")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating 404 page: {e}")
|
||||
return False
|
||||
|
||||
@@ -1,137 +0,0 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block header_title %}Rocky Linux Man Pages{% endblock %}
|
||||
{% block header_subtitle %}Man page documentation for Rocky Linux packages{% endblock %}
|
||||
|
||||
{% block extra_css %}
|
||||
.error-container {
|
||||
text-align: center;
|
||||
padding: 4rem 2rem;
|
||||
}
|
||||
|
||||
.error-code {
|
||||
font-size: 8rem;
|
||||
font-weight: 700;
|
||||
color: var(--accent-primary);
|
||||
line-height: 1;
|
||||
margin-bottom: 1rem;
|
||||
font-family: "JetBrains Mono", monospace;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
font-size: 1.5rem;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.error-description {
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 2rem;
|
||||
max-width: 600px;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
.suggestions {
|
||||
max-width: 600px;
|
||||
margin: 2rem auto;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.suggestions h3 {
|
||||
color: var(--text-primary);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.suggestions ul {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.suggestions li {
|
||||
margin-bottom: 0.75rem;
|
||||
padding-left: 1.5rem;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.suggestions li::before {
|
||||
content: "→";
|
||||
position: absolute;
|
||||
left: 0;
|
||||
color: var(--accent-primary);
|
||||
}
|
||||
|
||||
.back-button {
|
||||
display: inline-block;
|
||||
padding: 0.75rem 1.5rem;
|
||||
background: var(--accent-primary);
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
border-radius: 6px;
|
||||
font-weight: 500;
|
||||
transition: all 0.2s;
|
||||
margin-top: 2rem;
|
||||
}
|
||||
|
||||
.back-button:hover {
|
||||
background: var(--accent-secondary);
|
||||
transform: translateY(-2px);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.error-code {
|
||||
font-size: 5rem;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
.error-container {
|
||||
padding: 3rem 1rem;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.error-code {
|
||||
font-size: 4rem;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.error-container {
|
||||
padding: 2rem 1rem;
|
||||
}
|
||||
|
||||
.suggestions {
|
||||
padding: 0 1rem;
|
||||
}
|
||||
}
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="content">
|
||||
<div class="error-container">
|
||||
<div class="error-code">404</div>
|
||||
<div class="error-message">Page Not Found</div>
|
||||
<div class="error-description">
|
||||
The page you're looking for doesn't exist or may have been moved.
|
||||
</div>
|
||||
|
||||
<div class="suggestions">
|
||||
<h3>Suggestions:</h3>
|
||||
<ul>
|
||||
<li>Check the URL for typos</li>
|
||||
<li>Return to the <a href="/">home page</a> and navigate from there</li>
|
||||
<li>Use the search feature on the version index page</li>
|
||||
<li>The man page may be in a different version of Rocky Linux</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<a href="/" class="back-button">Go to Home Page</a>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -112,47 +112,6 @@ font-size: 0.9em;
|
||||
color: var(--success);
|
||||
}
|
||||
|
||||
/* OPTIONS section specific styling */
|
||||
/* Style paragraphs that contain option flags (b tags followed by i tags or immediately followed by Bd-indent) */
|
||||
.man-content section.Sh p.Pp:has(+ .Bd-indent) {
|
||||
font-weight: 600;
|
||||
font-size: 1.05em;
|
||||
margin-top: 1.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: linear-gradient(90deg, var(--bg-tertiary) 0%, transparent 100%);
|
||||
border-left: 3px solid var(--accent-primary);
|
||||
}
|
||||
|
||||
.man-content section.Sh p.Pp:has(+ .Bd-indent) b {
|
||||
color: var(--accent-primary);
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
.man-content section.Sh p.Pp:has(+ .Bd-indent) i {
|
||||
color: var(--text-secondary);
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Indented description blocks */
|
||||
.man-content .Bd-indent {
|
||||
margin-left: 2.5rem;
|
||||
margin-bottom: 1.5rem;
|
||||
padding-left: 1rem;
|
||||
border-left: 2px solid var(--border-color);
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
/* Add spacing between nested paragraphs in descriptions */
|
||||
.man-content .Bd-indent > p.Pp {
|
||||
margin-top: 0.75rem;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.man-content .Bd-indent > p.Pp:first-child {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
.man-content pre {
|
||||
background-color: var(--bg-primary);
|
||||
border: 1px solid var(--border-color);
|
||||
@@ -256,16 +215,6 @@ margin-left: 1rem;
|
||||
.man-content .Bl-dash {
|
||||
padding-left: 1rem;
|
||||
}
|
||||
|
||||
.man-content section.Sh p.Pp:has(+ .Bd-indent) {
|
||||
font-size: 1em;
|
||||
padding: 0.4rem 0.5rem;
|
||||
}
|
||||
|
||||
.man-content .Bd-indent {
|
||||
margin-left: 1.5rem;
|
||||
padding-left: 0.75rem;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
|
||||
Reference in New Issue
Block a user