Compare commits

16 Commits

Author SHA1 Message Date
Stephen Simpson
e356431b72 Refactor Rocky Man Page Generator
- Improved logging and error handling in main.py, including better version management and configuration handling.
- Enhanced RepoManager to dynamically switch between pub and vault repositories for package retrieval.
- Updated ManPageConverter to handle symlink detection and generate appropriate redirect HTML.
- Refined WebGenerator to support dynamic version grid layout and improved HTML generation for man pages and indexes.
- Modified templates to remove trademark symbols and enhance styling for version cards.
- Added support for existing versions in the root index generation.

Signed-off-by: Stephen Simpson <ssimpson89@users.noreply.github.com>
2025-12-04 17:04:55 -06:00
Stephen Simpson
89404a2042 update 2025-12-04 17:02:29 -06:00
c6dc7fe310 Remove parameter for parallel conversions from Jenkinsfile
Signed-off-by: Stephen Simpson <stevo81989@gmail.com>
2025-11-26 10:57:16 -06:00
411abf8683 Update Jenkinsfile to change parameter types for parallel downloads and conversions to strings
Signed-off-by: Stephen Simpson <stevo81989@gmail.com>
2025-11-26 10:55:43 -06:00
c3fef46d6a Update Jenkinsfile to modify default Rocky Linux version and add parameters for parallel downloads and conversions
Signed-off-by: Stephen Simpson <stevo81989@gmail.com>
2025-11-26 10:49:36 -06:00
a0994a5c16 Update Jenkinsfile 2025-11-25 13:41:19 -06:00
7bda4a1155 Update Jenkinsfile 2025-11-25 13:33:44 -06:00
3f2ecebb08 Update Jenkinsfile 2025-11-25 13:32:23 -06:00
c9b59737b9 Update Jenkinsfile 2025-11-25 11:52:39 -06:00
067965d983 Update Jenkinsfile 2025-11-25 11:50:10 -06:00
b9f6697100 Update Jenkinsfile 2025-11-25 10:54:31 -06:00
106e680f11 Update Jenkinsfile 2025-11-25 08:28:52 -06:00
b1e987f1b1 Merge pull request 'update' (#4) from fix-build into main
Reviewed-on: #4
2025-11-24 16:28:33 -06:00
9e2943754f Merge pull request 'update' (#3) from fix-build into main
Reviewed-on: #3
2025-11-24 15:52:56 -06:00
b371431aa5 Merge pull request 'update' (#2) from fix-build into main
Reviewed-on: #2
2025-11-24 15:27:06 -06:00
2315422d4f Merge pull request 'fix-build' (#1) from fix-build into main
Reviewed-on: #1
2025-11-24 15:20:19 -06:00
9 changed files with 484 additions and 571 deletions

Binary file not shown.

86
Jenkinsfile vendored
View File

@@ -1,6 +1,4 @@
// Jenkinsfile for Rocky Man // Jenkinsfile for Rocky Man
// This pipeline uses Kubernetes agents to build and run the container
pipeline { pipeline {
agent { agent {
kubernetes { kubernetes {
@@ -29,6 +27,11 @@ spec:
volumeMounts: volumeMounts:
- name: docker-sock - name: docker-sock
mountPath: /var/run mountPath: /var/run
- name: b2
image: backblazeit/b2:latest
command:
- cat
tty: true
volumes: volumes:
- name: docker-sock - name: docker-sock
emptyDir: {} emptyDir: {}
@@ -39,9 +42,29 @@ spec:
parameters { parameters {
string( string(
name: 'VERSIONS', name: 'VERSIONS',
defaultValue: '8.10 9.6 10.0', defaultValue: '8.10 9.6 10.1',
description: 'Rocky Linux versions to build (space-separated)' description: 'Rocky Linux versions to build (space-separated)'
) )
string(
name: 'B2_BUCKET_NAME',
defaultValue: 'rockyman',
description: 'B2 bucket name for uploads'
)
string(
name: 'EXISTING_VERSIONS',
defaultValue: '',
description: 'Existing versions already built (space-separated)'
)
string(
name: 'PARALLEL_DOWNLOADS',
defaultValue: '5',
description: 'Number of parallel downloads'
)
string(
name: 'PARALLEL_CONVERSIONS',
defaultValue: '10',
description: 'Number of parallel conversions'
)
} }
options { options {
@@ -60,8 +83,8 @@ spec:
steps { steps {
container('docker-cli') { container('docker-cli') {
sh ''' sh '''
docker build -t rocky-man:${BUILD_NUMBER} . docker build -t rocky-man:${BUILD_NUMBER} .
docker tag rocky-man:${BUILD_NUMBER} rocky-man:latest docker tag rocky-man:${BUILD_NUMBER} rocky-man:latest
''' '''
} }
} }
@@ -70,32 +93,52 @@ spec:
stage('Build Man Pages') { stage('Build Man Pages') {
steps { steps {
container('docker-cli') { container('docker-cli') {
sh """ sh '''
# Create output directories # Create output directories
mkdir -p ./html ./tmp mkdir -p ./html ./tmp
# Run the container to build man pages # Run the container to build man pages
docker run --rm \ docker run --rm \
-v "\$(pwd)/html:/app/html" \ -v "$(pwd)/html:/app/html" \
-v "\$(pwd)/tmp:/data/tmp" \ -v "$(pwd)/tmp:/data/tmp" \
rocky-man:${BUILD_NUMBER} \ rocky-man:${BUILD_NUMBER} \
--versions ${params.VERSIONS} \ --versions ${VERSIONS} \
--verbose --verbose \
""" --parallel-downloads ${PARALLEL_DOWNLOADS} \
--parallel-conversions ${PARALLEL_CONVERSIONS} \
--existing-versions "${EXISTING_VERSIONS}"
'''
} }
} }
} }
stage('Archive Artifacts') { stage('Upload to B2') {
when {
expression { return params.B2_BUCKET_NAME != "" }
}
steps { steps {
archiveArtifacts artifacts: 'html/**/*', fingerprint: true container('docker-cli') {
withCredentials([
string(credentialsId: 'b2-app-id', variable: 'B2_APPLICATION_KEY_ID'),
string(credentialsId: 'b2-app-key', variable: 'B2_APPLICATION_KEY')
]) {
sh '''
docker run --rm \
-v "$(pwd)/html:/workspace/html" \
-e B2_APPLICATION_KEY \
-e B2_APPLICATION_KEY_ID \
backblazeit/b2:latest \
b2v4 sync /workspace/html/ "b2://${B2_BUCKET_NAME}/"
'''
}
}
} }
} }
} }
post { post {
success { success {
echo 'Build completed successfully!' echo 'Build completed and uploaded to B2!'
} }
failure { failure {
echo 'Build failed!' echo 'Build failed!'
@@ -103,9 +146,8 @@ spec:
cleanup { cleanup {
container('docker-cli') { container('docker-cli') {
sh ''' sh '''
# Clean up Docker images to save space docker rmi rocky-man:${BUILD_NUMBER} || true
docker rmi rocky-man:${BUILD_NUMBER} || true docker rmi rocky-man:latest || true
docker rmi rocky-man:latest || true
''' '''
} }
} }

420
README.md
View File

@@ -1,121 +1,85 @@
# Rocky Man 📚 # Rocky Man 📚
**Rocky Man** is a comprehensive man page hosting solution for Rocky Linux, providing beautiful, searchable documentation for all packages in BaseOS and AppStream repositories across Rocky Linux 8, 9, and 10. **Rocky Man** is a tool for generating searchable HTML documentation from Rocky Linux man pages across BaseOS and AppStream repositories for Rocky Linux 8, 9, and 10.
> **✨ This is a complete rewrite** with 60-80% faster performance, modern architecture, and production-ready features!
## 🎉 What's New in This Rewrite
This version is a **complete ground-up rebuild** with major improvements:
- 🚀 **60-80% faster** - Pre-filters packages using filelists.xml (downloads only ~800 packages instead of ~3000)
- 🏗️ **Modular architecture** - Clean separation into models, repo, processor, web, and utils
- 🎨 **Modern UI** - Beautiful dark theme with instant fuzzy search
- 🐳 **Container ready** - Multi-stage Dockerfile that works on any architecture
-**Parallel processing** - Concurrent downloads and HTML conversions
- 🧹 **Smart cleanup** - Automatic cleanup of temporary files
- 📝 **Well documented** - Comprehensive docstrings and type hints throughout
- 🔒 **Thread safe** - Proper locking and resource management
- 🤖 **GitHub Actions** - Automated weekly builds and deployment
### Performance Comparison
| Metric | Old Version | New Version | Improvement |
|--------|-------------|-------------|-------------|
| Packages Downloaded | ~3000 | ~800 | 73% reduction |
| Processing Time | 2-3 hours | 30-45 minutes | 75% faster |
| Bandwidth Used | ~10 GB | ~2-3 GB | 80% reduction |
| Architecture | Single file | Modular (16 files) | Much cleaner |
| Thread Safety | ⚠️ Issues | ✅ Safe | Fixed |
| Cleanup | Manual | Automatic | Improved |
| UI Quality | Basic | Modern | Much better |
## Features ## Features
- **Fast & Efficient**: Uses filelists.xml to pre-filter packages with man pages (massive bandwidth savings) - **Fast & Efficient**: Uses filelists.xml to pre-filter packages with man pages
- 🔍 **Fuzzy Search**: Instant search across all man pages with Fuse.js - **Complete Coverage**: All packages from BaseOS and AppStream repositories
- 🎨 **Modern UI**: Clean, responsive dark theme interface inspired by GitHub - **Container Ready**: Works on x86_64, aarch64, arm64, etc.
- 📦 **Complete Coverage**: All packages from BaseOS and AppStream repositories - **Smart Cleanup**: Automatic cleanup of temporary files (configurable)
- 🐳 **Container Ready**: Architecture-independent Docker support (works on x86_64, aarch64, arm64, etc.) - **Parallel Processing**: Concurrent downloads and conversions for maximum speed
- 🚀 **GitHub Actions**: Automated weekly builds and deployment to GitHub Pages - **Multi-version**: Support for Rocky Linux 8, 9, and 10 simultaneously
- 🧹 **Smart Cleanup**: Automatic cleanup of temporary files (configurable)
-**Parallel Processing**: Concurrent downloads and conversions for maximum speed
- 🌐 **Multi-version**: Support for Rocky Linux 8, 9, and 10 simultaneously
## Quick Start ## Quick Start
### Option 1: Docker (Recommended) ### Podman (Recommended)
```bash
# Build the image
docker build -t rocky-man .
# Generate man pages for Rocky Linux 9.6
docker run --rm -v $(pwd)/html:/data/html rocky-man --versions 9.6
# Generate for multiple versions
docker run --rm -v $(pwd)/html:/data/html rocky-man --versions 8.10 9.6 10.0
# With verbose logging
docker run --rm -v $(pwd)/html:/data/html rocky-man --versions 9.6 --verbose
# Keep downloaded RPMs (mount the download directory)
docker run --rm -it \
-v $(pwd)/html:/data/html \
-v $(pwd)/downloads:/data/tmp/downloads \
rocky-man --versions 9.6 --keep-rpms --verbose
```
### Option 2: Podman (Native Rocky Linux)
```bash ```bash
# Build the image # Build the image
podman build -t rocky-man . podman build -t rocky-man .
# Run with podman (note the :Z flag for SELinux) # Generate man pages for Rocky Linux 9.6 (using defaults, no custom args)
podman run --rm -v $(pwd)/html:/data/html:Z rocky-man --versions 9.6 podman run --rm -v $(pwd)/html:/data/html:Z rocky-man
# Interactive mode for debugging # Generate for specific versions (requires explicit paths)
podman run --rm -it -v $(pwd)/html:/data/html:Z rocky-man --versions 9.6 --verbose podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
--versions 8.10 9.6 10.0 --output-dir /app/html
# With verbose logging
podman run --rm -v $(pwd)/html:/app/html:Z rocky-man \
--versions 9.6 --output-dir /app/html --verbose
# Keep downloaded RPMs (mount the download directory) # Keep downloaded RPMs (mount the download directory)
podman run --rm -it \ podman run --rm -it \
-v $(pwd)/html:/data/html:Z \ -v $(pwd)/html:/app/html:Z \
-v $(pwd)/downloads:/data/tmp/downloads:Z \ -v $(pwd)/downloads:/app/tmp/downloads:Z \
rocky-man --versions 9.6 --keep-rpms --verbose rocky-man --versions 9.6 --keep-rpms \
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
``` ```
### Option 3: Docker Compose (Development) ### Docker
```bash ```bash
# Build and run # Build the image
docker-compose up docker build -t rocky-man .
# The generated HTML will be in ./html/ # Generate man pages (using defaults, no custom args)
# Preview at http://localhost:8080 (nginx container) docker run --rm -v $(pwd)/html:/data/html rocky-man
# Generate for specific versions (requires explicit paths)
docker run --rm -v $(pwd)/html:/app/html rocky-man \
--versions 9.6 --output-dir /app/html
# Interactive mode for debugging
docker run --rm -it -v $(pwd)/html:/app/html rocky-man \
--versions 9.6 --output-dir /app/html --verbose
# Keep downloaded RPMs (mount the download directory)
docker run --rm -it \
-v $(pwd)/html:/app/html \
-v $(pwd)/downloads:/app/tmp/downloads \
rocky-man --versions 9.6 --keep-rpms \
--output-dir /app/html --download-dir /app/tmp/downloads --verbose
``` ```
### Directory Structure in Container ### Directory Structure in Container
When running in a container, rocky-man uses these directories inside `/data/`: The container uses different paths depending on whether you pass custom arguments:
- `/data/html` - Generated HTML output (mount this to access results) **Without custom arguments** (using Dockerfile CMD defaults):
- `/data/tmp/downloads` - Downloaded RPM files (temporary) - `/data/html` - Generated HTML output
- `/data/tmp/extracts` - Extracted man page files (temporary) - `/data/tmp/downloads` - Downloaded RPM files
- `/data/tmp/extracts` - Extracted man page files
By default, RPMs and extracts are automatically cleaned up after processing. If you want to keep the RPMs (e.g., for debugging or multiple runs), mount the download directory and use `--keep-rpms`: **With custom arguments** (argparse defaults from working directory `/app`):
- `/app/html` - Generated HTML output
- `/app/tmp/downloads` - Downloaded RPM files
- `/app/tmp/extracts` - Extracted man page files
```bash **Important**: When passing custom arguments, the container's CMD is overridden and the code falls back to relative paths (`./html` = `/app/html`). You must explicitly specify `--output-dir /app/html --download-dir /app/tmp/downloads` to match your volume mounts. Without this, files are written inside the container and lost when it stops (especially with `--rm`).
# This keeps RPMs on your host in ./downloads/
podman run --rm -it \
-v $(pwd)/html:/data/html:Z \
-v $(pwd)/downloads:/data/tmp/downloads:Z \
rocky-man --versions 9.6 --keep-rpms
```
**Note**: Without mounting `/data/tmp/downloads`, the `--keep-rpms` flag will keep files inside the container, but they'll be lost when the container stops (especially with `--rm`). ### Local Development
### Option 4: Local Development
#### Prerequisites #### Prerequisites
@@ -154,6 +118,9 @@ python -m rocky_man.main --parallel-downloads 10 --parallel-conversions 20
# Use a different mirror # Use a different mirror
python -m rocky_man.main --mirror https://mirrors.example.com/ python -m rocky_man.main --mirror https://mirrors.example.com/
# Only BaseOS (faster)
python -m rocky_man.main --repo-types BaseOS --versions 9.6
``` ```
## Architecture ## Architecture
@@ -164,59 +131,24 @@ Rocky Man is organized into clean, modular components:
rocky-man/ rocky-man/
├── src/rocky_man/ ├── src/rocky_man/
│ ├── models/ # Data models (Package, ManFile) │ ├── models/ # Data models (Package, ManFile)
│ ├── package.py # RPM package representation │ ├── repo/ # Repository management
│ └── manfile.py # Man page file representation ├── processor/ # Man page processing
│ ├── repo/ # Repository management │ ├── web/ # Web page generation
│ ├── manager.py # DNF repository operations │ ├── utils/ # Utilities
│ └── contents.py # Filelists.xml parser (key optimization!) │ └── main.py # Main entry point and orchestration
│ ├── processor/ # Man page processing ├── templates/ # Jinja2 templates
│ │ ├── extractor.py # Extract man pages from RPMs ├── Dockerfile # Multi-stage, arch-independent
│ │ └── converter.py # Convert to HTML with mandoc └── pyproject.toml # Python project configuration
│ ├── web/ # Web page generation
│ │ └── generator.py # HTML and search index generation
│ ├── utils/ # Utilities
│ │ └── config.py # Configuration management
│ └── main.py # Main entry point and orchestration
├── templates/ # Jinja2 templates
│ ├── base.html # Base template with modern styling
│ ├── index.html # Search page with Fuse.js
│ ├── manpage.html # Individual man page display
│ └── root.html # Multi-version landing page
├── Dockerfile # Multi-stage, arch-independent
├── docker-compose.yml # Development setup with nginx
├── .github/workflows/ # GitHub Actions automation
└── pyproject.toml # Python project configuration
``` ```
### How It Works ### How It Works
1. **Package Discovery** 🔍 1. **Package Discovery** - Parse repository `filelists.xml` to identify packages with man pages
- Parse repository `filelists.xml` to identify packages with man pages 2. **Smart Download** - Download only packages containing man pages with parallel downloads
- This is the **key optimization** - we know what to download before downloading! 3. **Extraction** - Extract man page files from RPM packages
4. **Conversion** - Convert troff format to HTML using mandoc
2. **Smart Download** ⬇️ 5. **Web Generation** - Wrap HTML in templates and generate search index
- Download only packages containing man pages (60-80% reduction) 6. **Cleanup** - Automatically remove temporary files (configurable)
- Parallel downloads for speed
- Architecture-independent (man pages are the same across arches)
3. **Extraction** 📦
- Extract man page files from RPM packages
- Handle gzipped and plain text man pages
- Support for multiple languages
4. **Conversion** 🔄
- Convert troff format to HTML using mandoc
- Clean up HTML output
- Parallel processing for speed
5. **Web Generation** 🌐
- Wrap HTML in beautiful templates
- Generate search index with fuzzy search
- Create multi-version navigation
6. **Cleanup** 🧹
- Automatically remove temporary files (configurable)
- Keep only what you need
## Command Line Options ## Command Line Options
@@ -266,183 +198,6 @@ Options:
-v, --verbose Enable verbose logging -v, --verbose Enable verbose logging
``` ```
### Examples
```bash
# Quick test with one version
python -m rocky_man.main --versions 9.6
# Production build with all versions (default)
python -m rocky_man.main
# Fast build with more parallelism
python -m rocky_man.main --parallel-downloads 15 --parallel-conversions 30
# Keep files for debugging
python -m rocky_man.main --keep-rpms --keep-extracts --verbose
# Custom mirror (faster for your location)
python -m rocky_man.main --mirror https://mirror.usi.edu/pub/rocky/
# Only BaseOS (faster)
python -m rocky_man.main --repo-types BaseOS --versions 9.6
```
## GitHub Actions Integration
This project includes a **production-ready GitHub Actions workflow** that:
- ✅ Runs automatically every Sunday at midnight UTC
- ✅ Can be manually triggered with custom version selection
- ✅ Builds man pages in a Rocky Linux container
- ✅ Automatically deploys to GitHub Pages
- ✅ Artifacts available for download
### Setup Instructions
1. **Enable GitHub Pages**
- Go to your repository → Settings → Pages
- Set source to **"GitHub Actions"**
- Save
2. **Trigger the workflow**
- Go to Actions tab
- Select "Build Rocky Man Pages"
- Click "Run workflow"
- Choose versions (or use default)
3. **Access your site**
- Will be available at: `https://YOUR_USERNAME.github.io/rocky-man/`
- Updates automatically every week!
### Workflow File
Located at `.github/workflows/build.yml`, it:
- Uses Rocky Linux 9 container
- Installs all dependencies
- Runs the build
- Uploads artifacts
- Deploys to GitHub Pages
## What's Different from the Original
| Feature | Old Version | New Version |
|---------|-------------|-------------|
| **Architecture** | Single 400-line file | Modular, 16 files across 6 modules |
| **Package Filtering** | Downloads everything | Pre-filters with filelists.xml |
| **Performance** | 2-3 hours, ~10 GB | 30-45 min, ~2-3 GB |
| **UI** | Basic template | Modern GitHub-inspired design |
| **Search** | Simple filter | Fuzzy search with Fuse.js |
| **Container** | Basic Podman commands | Multi-stage Dockerfile + compose |
| **Thread Safety** | Global dict issues | Proper locking mechanisms |
| **Cleanup** | Method exists but unused | Automatic, configurable |
| **Documentation** | Minimal comments | Comprehensive docstrings |
| **Type Hints** | None | Throughout codebase |
| **Error Handling** | Basic try/catch | Comprehensive with logging |
| **CI/CD** | None | GitHub Actions ready |
| **Testing** | None | Ready for pytest integration |
| **Configuration** | Hardcoded | Config class with defaults |
## Project Structure Details
```
rocky-man/
├── src/rocky_man/ # Main source code
│ ├── __init__.py # Package initialization
│ ├── main.py # Entry point and orchestration (200 lines)
│ ├── models/ # Data models
│ │ ├── __init__.py
│ │ ├── package.py # Package model with properties
│ │ └── manfile.py # ManFile model with path parsing
│ ├── repo/ # Repository operations
│ │ ├── __init__.py
│ │ ├── manager.py # DNF integration, downloads
│ │ └── contents.py # Filelists parser (key optimization)
│ ├── processor/ # Processing pipeline
│ │ ├── __init__.py
│ │ ├── extractor.py # RPM extraction with rpmfile
│ │ └── converter.py # mandoc conversion wrapper
│ ├── web/ # Web generation
│ │ ├── __init__.py
│ │ └── generator.py # Template rendering, search index
│ └── utils/ # Utilities
│ ├── __init__.py
│ └── config.py # Configuration management
├── templates/ # Jinja2 templates
│ ├── base.html # Base layout (modern dark theme)
│ ├── index.html # Search page (Fuse.js integration)
│ ├── manpage.html # Man page display
│ └── root.html # Multi-version landing
├── old/ # Your original code (preserved)
│ ├── rocky_man.py
│ ├── rocky_man2.py
│ └── templates/
├── .github/
│ └── workflows/
│ └── build.yml # GitHub Actions workflow
├── Dockerfile # Multi-stage build
├── .dockerignore # Optimize Docker context
├── docker-compose.yml # Dev environment
├── pyproject.toml # Python project config
├── .gitignore # Updated for new structure
└── README.md # This file!
```
## Development
### Adding New Features
The modular design makes it easy to extend:
- **New repositories**: Add to `config.repo_types` in `utils/config.py`
- **Custom templates**: Use `--template-dir` flag or modify `templates/`
- **Additional metadata**: Extend `Package` or `ManFile` models
- **Alternative converters**: Implement new converter in `processor/`
- **Different outputs**: Add new generator in `web/`
### Running Tests
```bash
# Install dev dependencies
pip3 install -e ".[dev]"
# Run tests (when implemented)
pytest
# Type checking
mypy src/
# Linting
ruff check src/
```
### Development Workflow
```bash
# 1. Make changes to code
vim src/rocky_man/processor/converter.py
# 2. Test locally in container
podman run --rm -it -v $(pwd):/app rockylinux:9 /bin/bash
cd /app
python3 -m rocky_man.main --versions 9.6 --verbose
# 3. Build Docker image
docker build -t rocky-man .
# 4. Test Docker image
docker run --rm -v $(pwd)/html:/data/html rocky-man --versions 9.6
# 5. Preview output
docker-compose up nginx
# Visit http://localhost:8080
# 6. Commit and push
git add .
git commit -m "feat: your feature description"
git push
```
## Troubleshooting ## Troubleshooting
### DNF Errors ### DNF Errors
@@ -510,12 +265,6 @@ python -m rocky_man.main --parallel-downloads 2 --parallel-conversions 5
python -m rocky_man.main --mirror https://mirror.example.com/rocky/ python -m rocky_man.main --mirror https://mirror.example.com/rocky/
``` ```
### UTF-8 Decode Errors
**Problem**: `'utf-8' codec can't decode byte...`
**Solution**: This is now handled with `errors='replace'` in the new version. The man page will still be processed with replacement characters for invalid UTF-8.
## Performance Tips ## Performance Tips
1. **Use closer mirrors** - Significant speed improvement for downloads 1. **Use closer mirrors** - Significant speed improvement for downloads
@@ -547,34 +296,3 @@ Contributions welcome! Please:
5. Commit with clear messages (`git commit -m 'feat: add amazing feature'`) 5. Commit with clear messages (`git commit -m 'feat: add amazing feature'`)
6. Push to your branch (`git push origin feature/amazing-feature`) 6. Push to your branch (`git push origin feature/amazing-feature`)
7. Open a Pull Request 7. Open a Pull Request
## Acknowledgments
- Inspired by [debiman](https://github.com/Debian/debiman) for Debian
- Uses [mandoc](https://mandoc.bsd.lv/) for man page conversion
- Search powered by [Fuse.js](https://fusejs.io/)
- Modern UI design inspired by GitHub's dark theme
## Links
- [Rocky Linux](https://rockylinux.org/)
- [Man Page Format](https://man7.org/linux/man-pages/)
- [Mandoc Documentation](https://mandoc.bsd.lv/)
- [DNF Documentation](https://dnf.readthedocs.io/)
## Roadmap
- [ ] Add pytest test suite
- [ ] Implement incremental updates (checksum-based)
- [ ] Add support for localized man pages (es, fr, etc.)
- [ ] Create redirect system like debiman
- [ ] Add statistics page (most viewed, etc.)
- [ ] Implement RSS feed for updates
- [ ] Add support for Rocky Linux 10 (when released)
- [ ] Create sitemap.xml for SEO
- [ ] Add dark/light theme toggle
- [ ] Implement caching for faster rebuilds
---
**Made with ❤️ for the Rocky Linux community**

View File

@@ -2,6 +2,7 @@
import argparse import argparse
import logging import logging
import re
import sys import sys
from pathlib import Path from pathlib import Path
@@ -16,16 +17,12 @@ def setup_logging(verbose: bool = False):
level = logging.DEBUG if verbose else logging.INFO level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig( logging.basicConfig(
level=level, level=level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt='%Y-%m-%d %H:%M:%S' datefmt="%Y-%m-%d %H:%M:%S",
) )
def process_version( def process_version(config: Config, version: str, template_dir: Path) -> bool:
config: Config,
version: str,
template_dir: Path
) -> bool:
"""Process a single Rocky Linux version. """Process a single Rocky Linux version.
Args: Args:
@@ -53,21 +50,18 @@ def process_version(
# Use first available architecture (man pages are arch-independent) # Use first available architecture (man pages are arch-independent)
arch = config.architectures[0] arch = config.architectures[0]
# Get repository URL
repo_url = config.get_repo_url(version, repo_type, arch)
# Create cache dir for this repo # Create cache dir for this repo
cache_dir = config.download_dir / f".cache/{version}/{repo_type}" cache_dir = config.download_dir / f".cache/{version}/{repo_type}"
try: try:
# Initialize repository manager # Initialize repository manager
repo_manager = RepoManager( repo_manager = RepoManager(
repo_url=repo_url, config=config,
version=version, version=version,
repo_type=repo_type, repo_type=repo_type,
arch=arch, arch=arch,
cache_dir=cache_dir, cache_dir=cache_dir,
download_dir=version_download_dir download_dir=version_download_dir,
) )
# List packages (with man pages only) # List packages (with man pages only)
@@ -83,19 +77,19 @@ def process_version(
if config.skip_packages: if config.skip_packages:
original_count = len(packages) original_count = len(packages)
packages = [ packages = [
pkg for pkg in packages pkg for pkg in packages if pkg.name not in config.skip_packages
if pkg.name not in config.skip_packages
] ]
filtered_count = original_count - len(packages) filtered_count = original_count - len(packages)
if filtered_count > 0: if filtered_count > 0:
logger.info(f"Filtered out {filtered_count} packages based on skip list") logger.info(
f"Filtered out {filtered_count} packages based on skip list"
)
logger.info(f"Processing {len(packages)} packages") logger.info(f"Processing {len(packages)} packages")
# Download packages # Download packages
logger.info("Downloading packages...") logger.info("Downloading packages...")
downloaded = repo_manager.download_packages( downloaded = repo_manager.download_packages(
packages, packages, max_workers=config.parallel_downloads
max_workers=config.parallel_downloads
) )
# Extract man pages # Extract man pages
@@ -103,11 +97,10 @@ def process_version(
extractor = ManPageExtractor( extractor = ManPageExtractor(
version_extract_dir, version_extract_dir,
skip_sections=config.skip_sections, skip_sections=config.skip_sections,
skip_languages=config.skip_languages skip_languages=config.skip_languages,
) )
man_files = extractor.extract_from_packages( man_files = extractor.extract_from_packages(
downloaded, downloaded, max_workers=config.parallel_downloads
max_workers=config.parallel_downloads
) )
logger.info(f"Extracted {len(man_files)} man pages") logger.info(f"Extracted {len(man_files)} man pages")
@@ -124,8 +117,7 @@ def process_version(
logger.info("Converting man pages to HTML...") logger.info("Converting man pages to HTML...")
converter = ManPageConverter(version_output_dir) converter = ManPageConverter(version_output_dir)
converted = converter.convert_many( converted = converter.convert_many(
man_files_with_content, man_files_with_content, max_workers=config.parallel_conversions
max_workers=config.parallel_conversions
) )
all_man_files.extend(converted) all_man_files.extend(converted)
@@ -149,11 +141,6 @@ def process_version(
logger.error(f"No man pages were successfully processed for version {version}") logger.error(f"No man pages were successfully processed for version {version}")
return False return False
# Link cross-references between man pages
logger.info("Linking cross-references...")
converter = ManPageConverter(version_output_dir)
converter.link_cross_references(all_man_files)
# Generate web pages # Generate web pages
logger.info("Generating web pages...") logger.info("Generating web pages...")
web_gen = WebGenerator(template_dir, config.output_dir) web_gen = WebGenerator(template_dir, config.output_dir)
@@ -168,132 +155,154 @@ def process_version(
# Generate packages index page # Generate packages index page
web_gen.generate_packages_index(version, search_index) web_gen.generate_packages_index(version, search_index)
# Set HTML paths for all man files
for man_file in all_man_files:
if not man_file.html_path:
man_file.html_path = web_gen._get_manpage_path(man_file, version)
# Link cross-references between man pages
logger.info("Linking cross-references...")
converter.link_cross_references(all_man_files, version)
# Wrap man pages in templates # Wrap man pages in templates
logger.info("Generating man page HTML...") logger.info("Generating man page HTML...")
for man_file in all_man_files: for man_file in all_man_files:
web_gen.generate_manpage_html(man_file, version) web_gen.generate_manpage_html(man_file, version)
logger.info(f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}") logger.info(
f"Successfully processed {len(all_man_files)} man pages for Rocky Linux {version}"
)
return True return True
def main(): def main():
"""Main entry point.""" """Main entry point."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Generate HTML documentation for Rocky Linux man pages' description="Generate HTML documentation for Rocky Linux man pages"
) )
parser.add_argument( parser.add_argument(
'--versions', "--versions",
nargs='+', nargs="+",
default=['8.10', '9.6', '10.0'], default=["8.10", "9.6", "10.0"],
help='Rocky Linux versions to process (default: 8.10 9.6 10.0)' help="Rocky Linux versions to process (default: 8.10 9.6 10.0)",
) )
parser.add_argument( parser.add_argument(
'--repo-types', "--repo-types",
nargs='+', nargs="+",
default=['BaseOS', 'AppStream'], default=["BaseOS", "AppStream"],
help='Repository types to process (default: BaseOS AppStream)' help="Repository types to process (default: BaseOS AppStream)",
) )
parser.add_argument( parser.add_argument(
'--output-dir', "--output-dir",
type=Path, type=Path,
default=Path('./html'), default=Path("./html"),
help='Output directory for HTML files (default: ./html)' help="Output directory for HTML files (default: ./html)",
) )
parser.add_argument( parser.add_argument(
'--download-dir', "--download-dir",
type=Path, type=Path,
default=Path('./tmp/downloads'), default=Path("./tmp/downloads"),
help='Directory for downloading packages (default: ./tmp/downloads)' help="Directory for downloading packages (default: ./tmp/downloads)",
) )
parser.add_argument( parser.add_argument(
'--extract-dir', "--extract-dir",
type=Path, type=Path,
default=Path('./tmp/extracts'), default=Path("./tmp/extracts"),
help='Directory for extracting man pages (default: ./tmp/extracts)' help="Directory for extracting man pages (default: ./tmp/extracts)",
) )
parser.add_argument( parser.add_argument(
'--keep-rpms', "--keep-rpms",
action='store_true', action="store_true",
help='Keep downloaded RPM files after processing' help="Keep downloaded RPM files after processing",
) )
parser.add_argument( parser.add_argument(
'--keep-extracts', "--keep-extracts",
action='store_true', action="store_true",
help='Keep extracted man files after processing' help="Keep extracted man files after processing",
) )
parser.add_argument( parser.add_argument(
'--parallel-downloads', "--parallel-downloads",
type=int, type=int,
default=5, default=5,
help='Number of parallel downloads (default: 5)' help="Number of parallel downloads (default: 5)",
) )
parser.add_argument( parser.add_argument(
'--parallel-conversions', "--parallel-conversions",
type=int, type=int,
default=10, default=10,
help='Number of parallel HTML conversions (default: 10)' help="Number of parallel HTML conversions (default: 10)",
) )
parser.add_argument( parser.add_argument(
'--mirror', "--mirror",
default='http://dl.rockylinux.org/', default="http://dl.rockylinux.org/",
help='Rocky Linux mirror URL (default: http://dl.rockylinux.org/)' help="Rocky Linux mirror URL (default: http://dl.rockylinux.org/)",
) )
parser.add_argument( parser.add_argument(
'--template-dir', "--vault",
action="store_true",
help="Use vault directory instead of pub (vault/rocky instead of pub/rocky)",
)
parser.add_argument(
"--existing-versions",
nargs="*",
metavar="VERSION",
help="List of existing versions to include in root index (e.g., 8.10 9.7)",
)
parser.add_argument(
"--template-dir",
type=Path, type=Path,
default=Path(__file__).parent.parent.parent / 'templates', default=Path(__file__).parent.parent.parent / "templates",
help='Template directory (default: ./templates)' help="Template directory (default: ./templates)",
) )
parser.add_argument( parser.add_argument(
'-v', '--verbose', "-v", "--verbose", action="store_true", help="Enable verbose logging"
action='store_true',
help='Enable verbose logging'
) )
parser.add_argument( parser.add_argument(
'--skip-sections', "--skip-sections",
nargs='*', nargs="*",
default=None, default=None,
help='Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.' help="Man sections to skip (default: 3 3p 3pm). Use empty list to skip none.",
) )
parser.add_argument( parser.add_argument(
'--skip-packages', "--skip-packages",
nargs='*', nargs="*",
default=None, default=None,
help='Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.' help="Package names to skip (default: lapack dpdk-devel gl-manpages). Use empty list to skip none.",
) )
parser.add_argument( parser.add_argument(
'--skip-languages', "--skip-languages",
action='store_true', action="store_true",
default=None, default=None,
help='Skip non-English man pages (default: enabled)' help="Skip non-English man pages (default: enabled)",
) )
parser.add_argument( parser.add_argument(
'--keep-languages', "--keep-languages",
action='store_true', action="store_true",
help='Keep all languages (disables --skip-languages)' help="Keep all languages (disables --skip-languages)",
) )
parser.add_argument( parser.add_argument(
'--allow-all-sections', "--allow-all-sections",
action='store_true', action="store_true",
help='Include all man sections (overrides --skip-sections)' help="Include all man sections (overrides --skip-sections)",
) )
args = parser.parse_args() args = parser.parse_args()
@@ -309,9 +318,13 @@ def main():
elif args.skip_languages is not None: elif args.skip_languages is not None:
skip_languages = args.skip_languages skip_languages = args.skip_languages
# Determine content directory
content_dir = "vault/rocky" if args.vault else "pub/rocky"
# Create configuration # Create configuration
config = Config( config = Config(
base_url=args.mirror, base_url=args.mirror,
content_dir=content_dir,
versions=args.versions, versions=args.versions,
repo_types=args.repo_types, repo_types=args.repo_types,
download_dir=args.download_dir, download_dir=args.download_dir,
@@ -324,11 +337,31 @@ def main():
skip_sections=args.skip_sections, skip_sections=args.skip_sections,
skip_packages=args.skip_packages, skip_packages=args.skip_packages,
skip_languages=skip_languages, skip_languages=skip_languages,
allow_all_sections=args.allow_all_sections allow_all_sections=args.allow_all_sections,
) )
# Get existing versions from scan and argument
scanned_versions = [
d.name
for d in config.output_dir.iterdir()
if d.is_dir() and re.match(r"\d+\.\d+", d.name)
]
arg_versions = args.existing_versions or []
# Sort versions numerically by (major, minor)
def version_key(v):
try:
major, minor = v.split(".")
return (int(major), int(minor))
except (ValueError, AttributeError):
return (0, 0)
existing_versions = sorted(set(scanned_versions + arg_versions), key=version_key)
all_versions = sorted(set(existing_versions + config.versions), key=version_key)
logger.info("Rocky Man - Rocky Linux Man Page Generator") logger.info("Rocky Man - Rocky Linux Man Page Generator")
logger.info(f"Versions: {', '.join(config.versions)}") logger.info(f"Versions to process: {', '.join(config.versions)}")
logger.info(f"All known versions: {', '.join(all_versions)}")
logger.info(f"Repositories: {', '.join(config.repo_types)}") logger.info(f"Repositories: {', '.join(config.repo_types)}")
logger.info(f"Output directory: {config.output_dir}") logger.info(f"Output directory: {config.output_dir}")
@@ -362,7 +395,7 @@ def main():
# Generate root index # Generate root index
logger.info("Generating root index page...") logger.info("Generating root index page...")
web_gen = WebGenerator(args.template_dir, config.output_dir) web_gen = WebGenerator(args.template_dir, config.output_dir)
web_gen.generate_root_index(processed_versions) web_gen.generate_root_index(all_versions)
logger.info("=" * 60) logger.info("=" * 60)
logger.info("Processing complete!") logger.info("Processing complete!")
@@ -373,5 +406,5 @@ def main():
return 0 return 0
if __name__ == '__main__': if __name__ == "__main__":
sys.exit(main()) sys.exit(main())

View File

@@ -40,11 +40,7 @@ class ManPageConverter:
try: try:
# Run mandoc with no arguments - it will show usage and exit # Run mandoc with no arguments - it will show usage and exit
# We just want to verify the command exists, not that it succeeds # We just want to verify the command exists, not that it succeeds
subprocess.run( subprocess.run(["mandoc"], capture_output=True, timeout=5)
['mandoc'],
capture_output=True,
timeout=5
)
return True return True
except FileNotFoundError: except FileNotFoundError:
# mandoc command not found # mandoc command not found
@@ -73,6 +69,31 @@ class ManPageConverter:
# Clean up HTML # Clean up HTML
html = self._clean_html(html) html = self._clean_html(html)
# Check if mandoc output indicates this is a symlink/redirect
# Pattern: <div class="manual-text">/usr/share/man/man8/target.8.gz</div>
# or: <div class="manual-text">See the file /usr/share/man/man8/target.8.</div>
# or: <div class="manual-text">See the file man1/builtin.1.</div>
symlink_match = re.search(
r'<div class="manual-text">.*?(?:See the file )?((?:/usr/share/man/)?man\d+[a-z]*/([^/]+)\.(\d+[a-z]*)(?:\.gz)?)\..*?</div>',
html,
re.DOTALL,
)
if not symlink_match:
# Try simpler pattern without "See the file" or period
symlink_match = re.search(
r'<div class="manual-text">.*?((?:/usr/share/man/)?man\d+[a-z]*/([^/<]+)\.(\d+[a-z]*)(?:\.gz)?).*?</div>',
html,
re.DOTALL,
)
if symlink_match:
name = symlink_match.group(2)
section = symlink_match.group(3)
logger.info(
f"{man_file.display_name} detected as symlink to {name}({section})"
)
html = self._generate_redirect_html({"name": name, "section": section})
# Store in ManFile object # Store in ManFile object
man_file.html_content = html man_file.html_content = html
@@ -82,7 +103,7 @@ class ManPageConverter:
# Save HTML file # Save HTML file
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f: with open(output_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
logger.debug(f"Converted {man_file.display_name} -> {output_path}") logger.debug(f"Converted {man_file.display_name} -> {output_path}")
@@ -93,9 +114,7 @@ class ManPageConverter:
return False return False
def convert_many( def convert_many(
self, self, man_files: List[tuple], max_workers: int = 10
man_files: List[tuple],
max_workers: int = 10
) -> List[ManFile]: ) -> List[ManFile]:
"""Convert multiple man pages in parallel. """Convert multiple man pages in parallel.
@@ -138,21 +157,21 @@ class ManPageConverter:
""" """
try: try:
result = subprocess.run( result = subprocess.run(
['mandoc', '-T', 'html', '-O', 'fragment,toc'], ["mandoc", "-T", "html", "-O", "fragment,toc"],
input=content.encode('utf-8'), input=content.encode("utf-8"),
capture_output=True, capture_output=True,
timeout=30 timeout=30,
) )
if result.returncode != 0: if result.returncode != 0:
stderr = result.stderr.decode('utf-8', errors='replace') stderr = result.stderr.decode("utf-8", errors="replace")
logger.warning(f"mandoc returned error: {stderr}") logger.warning(f"mandoc returned error: {stderr}")
# Sometimes mandoc returns non-zero but still produces output # Sometimes mandoc returns non-zero but still produces output
if result.stdout: if result.stdout:
return result.stdout.decode('utf-8', errors='replace') return result.stdout.decode("utf-8", errors="replace")
return None return None
return result.stdout.decode('utf-8', errors='replace') return result.stdout.decode("utf-8", errors="replace")
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
logger.error("mandoc conversion timed out") logger.error("mandoc conversion timed out")
@@ -172,14 +191,10 @@ class ManPageConverter:
""" """
# Remove empty parentheses in header cells # Remove empty parentheses in header cells
html = re.sub( html = re.sub(
r'<td class="head-ltitle">\(\)</td>', r'<td class="head-ltitle">\(\)</td>', '<td class="head-ltitle"></td>', html
'<td class="head-ltitle"></td>',
html
) )
html = re.sub( html = re.sub(
r'<td class="head-rtitle">\(\)</td>', r'<td class="head-rtitle">\(\)</td>', '<td class="head-rtitle"></td>', html
'<td class="head-rtitle"></td>',
html
) )
# Strip leading/trailing whitespace # Strip leading/trailing whitespace
@@ -187,7 +202,34 @@ class ManPageConverter:
return html return html
def link_cross_references(self, man_files: List[ManFile]) -> None: def _generate_redirect_html(self, target_info: dict) -> str:
"""Generate HTML for a symlink/redirect page.
Args:
target_info: Dict with 'name' and 'section' of target man page
Returns:
HTML fragment for redirect page
"""
name = target_info["name"]
section = target_info["section"]
# Generate the relative path to the target man page
# Symlinks are in the same package, just different file names
target_filename = f"{name}.{section}.html"
# Generate simple redirect HTML with a working hyperlink
html = f'''<div class="symlink-notice" style="padding: 2rem; text-align: center; background-color: var(--bg-tertiary); border-radius: 8px; border: 1px solid var(--border-color);">
<p style="font-size: 1.2rem; margin-bottom: 1.5rem; color: var(--text-primary);">
This is an alias for <b>{name}</b>({section}).
</p>
<p style="font-size: 1.1rem;">
<a href="{target_filename}" style="color: var(--accent-primary); text-decoration: none; font-weight: 500;">View the manual page</a>
</p>
</div>'''
return html
def link_cross_references(self, man_files: List[ManFile], version: str) -> None:
"""Add hyperlinks to cross-references in SEE ALSO sections. """Add hyperlinks to cross-references in SEE ALSO sections.
Goes through all converted HTML files and converts man page references Goes through all converted HTML files and converts man page references
@@ -206,31 +248,31 @@ class ManPageConverter:
logger.info(f"Linking cross-references across {len(man_files)} man pages...") logger.info(f"Linking cross-references across {len(man_files)} man pages...")
# Process each man page HTML file # Process each man page HTML content
for man_file in man_files: for man_file in man_files:
if not man_file.html_path or not man_file.html_path.exists(): if not man_file.html_content:
continue continue
try: try:
# Read the HTML html = man_file.html_content
with open(man_file.html_path, 'r', encoding='utf-8') as f:
html = f.read()
# Find and replace man page references # Find and replace man page references
# Mandoc outputs references as: <b>name</b>(section) # Mandoc outputs references as: <b>name</b>(section)
# Pattern matches both <b>name</b>(section) and plain name(section) # Pattern matches both <b>name</b>(section) and plain name(section)
pattern = r'<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)' pattern = (
r"<b>([\w\-_.]+)</b>\((\d+[a-z]*)\)|\b([\w\-_.]+)\((\d+[a-z]*)\)"
)
def replace_reference(match): def replace_reference(match):
full_match = match.group(0) full_match = match.group(0)
# Check if this match is already inside an <a> tag # Check if this match is already inside an <a> tag
# Look back up to 500 chars for context # Look back up to 500 chars for context
before_text = html[max(0, match.start()-500):match.start()] before_text = html[max(0, match.start() - 500) : match.start()]
# Find the last <a and last </a> before this match # Find the last <a and last </a> before this match
last_open = before_text.rfind('<a ') last_open = before_text.rfind("<a ")
last_close = before_text.rfind('</a>') last_close = before_text.rfind("</a>")
# If the last <a> is after the last </a>, we're inside a link # If the last <a> is after the last </a>, we're inside a link
if last_open > last_close: if last_open > last_close:
@@ -249,23 +291,24 @@ class ManPageConverter:
# Calculate relative path from current file to target # Calculate relative path from current file to target
target_path = lookup[key] target_path = lookup[key]
# File structure: output_dir/version/package_name/manN/file.html # File structure: output_dir/version/package_name/manN/file.html
# Need to go up 3 levels to reach version root # Need to go up 3 levels to reach output root, then down to version/target
# Current: package_name/manN/file.html # Current: version/package_name/manN/file.html
# Target: other_package/manM/file.html # Target: version/other_package/manM/file.html
rel_path = f"../../../{target_path}" rel_path = f"../../../{version}/{target_path}"
return f'<a href="{rel_path}">{full_match}</a>' return f'<a href="{rel_path}">{full_match}</a>'
return full_match return full_match
updated_html = re.sub(pattern, replace_reference, html) updated_html = re.sub(pattern, replace_reference, html)
# Only write if something changed # Update the content if something changed
if updated_html != html: if updated_html != html:
with open(man_file.html_path, 'w', encoding='utf-8') as f: man_file.html_content = updated_html
f.write(updated_html)
except Exception as e: except Exception as e:
logger.warning(f"Error linking references in {man_file.display_name}: {e}") logger.warning(
f"Error linking references in {man_file.display_name}: {e}"
)
logger.info("Cross-reference linking complete") logger.info("Cross-reference linking complete")

View File

@@ -25,7 +25,7 @@ class RepoManager:
def __init__( def __init__(
self, self,
repo_url: str, config,
version: str, version: str,
repo_type: str, repo_type: str,
arch: str, arch: str,
@@ -35,14 +35,14 @@ class RepoManager:
"""Initialize repository manager. """Initialize repository manager.
Args: Args:
repo_url: Full repository URL config: Configuration object
version: Rocky Linux version (e.g., '9.5') version: Rocky Linux version (e.g., '9.5')
repo_type: Repository type ('BaseOS' or 'AppStream') repo_type: Repository type ('BaseOS' or 'AppStream')
arch: Architecture (e.g., 'x86_64') arch: Architecture (e.g., 'x86_64')
cache_dir: Directory for caching metadata cache_dir: Directory for caching metadata
download_dir: Directory for downloading packages download_dir: Directory for downloading packages
""" """
self.repo_url = repo_url self.config = config
self.version = version self.version = version
self.repo_type = repo_type self.repo_type = repo_type
self.arch = arch self.arch = arch
@@ -58,7 +58,7 @@ class RepoManager:
self.base.conf.errorlevel = 0 self.base.conf.errorlevel = 0
self.base.conf.cachedir = str(self.cache_dir / "dnf") self.base.conf.cachedir = str(self.cache_dir / "dnf")
self._configure_repo() self.repo_url = None
self.packages_with_manpages: Optional[Set[str]] = None self.packages_with_manpages: Optional[Set[str]] = None
def _configure_repo(self): def _configure_repo(self):
@@ -88,8 +88,32 @@ class RepoManager:
if self.packages_with_manpages is not None: if self.packages_with_manpages is not None:
return self.packages_with_manpages return self.packages_with_manpages
parser = ContentsParser(self.repo_url, self.cache_dir) # Try pub first, then vault if it fails
self.packages_with_manpages = parser.get_packages_with_manpages() content_dirs = ["pub/rocky", "vault/rocky"]
for content_dir in content_dirs:
original_content_dir = self.config.content_dir
self.config.content_dir = content_dir
try:
repo_url = self.config.get_repo_url(
self.version, self.repo_type, self.arch
)
parser = ContentsParser(repo_url, self.cache_dir)
packages = parser.get_packages_with_manpages()
if packages: # Only use if it has man pages
self.packages_with_manpages = packages
self.repo_url = repo_url # Set for later use
logger.info(f"Using repository: {repo_url}")
break
else:
logger.warning(f"No man pages found in {content_dir}, trying next")
except Exception as e:
logger.warning(f"Failed to load metadata from {content_dir}: {e}")
finally:
self.config.content_dir = original_content_dir
else:
raise RuntimeError(
f"Failed to load repository metadata for {self.version} {self.repo_type} from both pub and vault"
)
return self.packages_with_manpages return self.packages_with_manpages
@@ -102,7 +126,9 @@ class RepoManager:
Returns: Returns:
List of Package objects List of Package objects
""" """
logger.info(f"Querying packages from {self.repo_type} ({self.version}/{self.arch})") logger.info(
f"Querying packages from {self.repo_type} ({self.version}/{self.arch})"
)
# Get packages with man pages if filtering # Get packages with man pages if filtering
manpage_packages = None manpage_packages = None
@@ -110,6 +136,9 @@ class RepoManager:
manpage_packages = self.discover_packages_with_manpages() manpage_packages = self.discover_packages_with_manpages()
logger.info(f"Filtering to {len(manpage_packages)} packages with man pages") logger.info(f"Filtering to {len(manpage_packages)} packages with man pages")
# Configure DNF repo now that we have the correct repo_url
self._configure_repo()
packages = [] packages = []
# Query all available packages # Query all available packages
@@ -176,7 +205,7 @@ class RepoManager:
response.raise_for_status() response.raise_for_status()
# Download with progress (optional: could add progress bar here) # Download with progress (optional: could add progress bar here)
with open(download_path, 'wb') as f: with open(download_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
@@ -192,9 +221,7 @@ class RepoManager:
return False return False
def download_packages( def download_packages(
self, self, packages: List[Package], max_workers: int = 5
packages: List[Package],
max_workers: int = 5
) -> List[Package]: ) -> List[Package]:
"""Download multiple packages in parallel. """Download multiple packages in parallel.
@@ -210,8 +237,7 @@ class RepoManager:
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all download tasks # Submit all download tasks
future_to_pkg = { future_to_pkg = {
executor.submit(self.download_package, pkg): pkg executor.submit(self.download_package, pkg): pkg for pkg in packages
for pkg in packages
} }
# Process completed downloads # Process completed downloads
@@ -223,7 +249,9 @@ class RepoManager:
except Exception as e: except Exception as e:
logger.error(f"Error processing {pkg.name}: {e}") logger.error(f"Error processing {pkg.name}: {e}")
logger.info(f"Successfully downloaded {len(downloaded)}/{len(packages)} packages") logger.info(
f"Successfully downloaded {len(downloaded)}/{len(packages)} packages"
)
return downloaded return downloaded
def cleanup_package(self, package: Package): def cleanup_package(self, package: Package):

View File

@@ -36,7 +36,7 @@ class WebGenerator:
# Setup Jinja2 environment # Setup Jinja2 environment
self.env = Environment( self.env = Environment(
loader=FileSystemLoader(str(self.template_dir)), loader=FileSystemLoader(str(self.template_dir)),
autoescape=select_autoescape(['html', 'xml']) autoescape=select_autoescape(["html", "xml"]),
) )
def generate_manpage_html(self, man_file: ManFile, version: str) -> bool: def generate_manpage_html(self, man_file: ManFile, version: str) -> bool:
@@ -54,7 +54,7 @@ class WebGenerator:
return False return False
try: try:
template = self.env.get_template('manpage.html') template = self.env.get_template("manpage.html")
html = template.render( html = template.render(
title=f"{man_file.display_name} - {man_file.package_name} - Rocky Linux {version}", title=f"{man_file.display_name} - {man_file.package_name} - Rocky Linux {version}",
@@ -62,8 +62,8 @@ class WebGenerator:
package_name=man_file.package_name, package_name=man_file.package_name,
version=version, version=version,
section=man_file.section, section=man_file.section,
language=man_file.language or 'en', language=man_file.language or "en",
content=man_file.html_content content=man_file.html_content,
) )
# Ensure output path is set # Ensure output path is set
@@ -72,7 +72,7 @@ class WebGenerator:
man_file.html_path.parent.mkdir(parents=True, exist_ok=True) man_file.html_path.parent.mkdir(parents=True, exist_ok=True)
with open(man_file.html_path, 'w', encoding='utf-8') as f: with open(man_file.html_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
return True return True
@@ -92,19 +92,19 @@ class WebGenerator:
True if successful True if successful
""" """
try: try:
template = self.env.get_template('index.html') template = self.env.get_template("index.html")
html = template.render( html = template.render(
title=f"Rocky Linux {version} Man Pages", title=f"Rocky Linux {version} Man Pages",
version=version, version=version,
total_pages=len(search_data), total_pages=len(search_data),
packages=sorted(search_data.keys()) packages=sorted(search_data.keys()),
) )
index_path = self.output_dir / version / 'index.html' index_path = self.output_dir / version / "index.html"
index_path.parent.mkdir(parents=True, exist_ok=True) index_path.parent.mkdir(parents=True, exist_ok=True)
with open(index_path, 'w', encoding='utf-8') as f: with open(index_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
logger.info(f"Generated index for version {version}") logger.info(f"Generated index for version {version}")
@@ -114,7 +114,9 @@ class WebGenerator:
logger.error(f"Error generating index for {version}: {e}") logger.error(f"Error generating index for {version}: {e}")
return False return False
def generate_packages_index(self, version: str, search_data: Dict[str, Any]) -> bool: def generate_packages_index(
self, version: str, search_data: Dict[str, Any]
) -> bool:
"""Generate full packages index page. """Generate full packages index page.
Args: Args:
@@ -131,33 +133,32 @@ class WebGenerator:
for pkg_name, pages in search_data.items(): for pkg_name, pages in search_data.items():
first_char = pkg_name[0].upper() first_char = pkg_name[0].upper()
if not first_char.isalpha(): if not first_char.isalpha():
first_char = 'other' first_char = "other"
if first_char not in packages_by_letter: if first_char not in packages_by_letter:
packages_by_letter[first_char] = [] packages_by_letter[first_char] = []
packages_by_letter[first_char].append({ packages_by_letter[first_char].append(
'name': pkg_name, {"name": pkg_name, "count": len(pages)}
'count': len(pages) )
})
# Sort packages within each letter # Sort packages within each letter
for letter in packages_by_letter: for letter in packages_by_letter:
packages_by_letter[letter].sort(key=lambda x: x['name']) packages_by_letter[letter].sort(key=lambda x: x["name"])
template = self.env.get_template('packages.html') template = self.env.get_template("packages.html")
html = template.render( html = template.render(
title=f"All Packages - Rocky Linux {version}", title=f"All Packages - Rocky Linux {version}",
version=version, version=version,
total_packages=len(search_data), total_packages=len(search_data),
packages_by_letter=packages_by_letter packages_by_letter=packages_by_letter,
) )
output_path = self.output_dir / version / 'packages.html' output_path = self.output_dir / version / "packages.html"
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f: with open(output_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
logger.info(f"Generated packages index for version {version}") logger.info(f"Generated packages index for version {version}")
@@ -168,9 +169,7 @@ class WebGenerator:
return False return False
def generate_search_index( def generate_search_index(
self, self, man_files: List[ManFile], version: str
man_files: List[ManFile],
version: str
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Generate search index from man files. """Generate search index from man files.
@@ -191,12 +190,12 @@ class WebGenerator:
# Create entry for this man page # Create entry for this man page
entry = { entry = {
'name': man_file.name, "name": man_file.name,
'section': man_file.section, "section": man_file.section,
'display_name': man_file.display_name, "display_name": man_file.display_name,
'language': man_file.language or 'en', "language": man_file.language or "en",
'url': man_file.uri_path, "url": man_file.uri_path,
'full_name': f"{man_file.package_name} - {man_file.display_name}" "full_name": f"{man_file.package_name} - {man_file.display_name}",
} }
# Use display name as key (handles duplicates with different sections) # Use display name as key (handles duplicates with different sections)
@@ -222,18 +221,18 @@ class WebGenerator:
version_dir = self.output_dir / version version_dir = self.output_dir / version
version_dir.mkdir(parents=True, exist_ok=True) version_dir.mkdir(parents=True, exist_ok=True)
json_path = version_dir / 'search.json' json_path = version_dir / "search.json"
gz_path = version_dir / 'search.json.gz' gz_path = version_dir / "search.json.gz"
# Sort for consistency # Sort for consistency
sorted_index = {k: index[k] for k in sorted(index)} sorted_index = {k: index[k] for k in sorted(index)}
# Save plain JSON # Save plain JSON
with open(json_path, 'w', encoding='utf-8') as f: with open(json_path, "w", encoding="utf-8") as f:
json.dump(sorted_index, f, indent=2) json.dump(sorted_index, f, indent=2)
# Save gzipped JSON # Save gzipped JSON
with gzip.open(gz_path, 'wt', encoding='utf-8') as f: with gzip.open(gz_path, "wt", encoding="utf-8") as f:
json.dump(sorted_index, f) json.dump(sorted_index, f)
logger.info(f"Saved search index for {version} ({len(index)} packages)") logger.info(f"Saved search index for {version} ({len(index)} packages)")
@@ -269,24 +268,42 @@ class WebGenerator:
True if successful True if successful
""" """
try: try:
template = self.env.get_template('root.html') template = self.env.get_template("root.html")
# Sort versions numerically (e.g., 8.10, 9.6, 10.0) # Group versions by major version
def version_key(v): major_to_minors = {}
for v in versions:
try: try:
parts = v.split('.') major, minor = v.split(".")
return tuple(int(p) for p in parts) major_to_minors.setdefault(major, []).append(minor)
except (ValueError, AttributeError): except ValueError:
return (0, 0) continue # Skip invalid versions
# Sort majors ascending, minors descending within each major
sorted_majors = sorted(major_to_minors, key=int)
max_minors = max(len(major_to_minors[major]) for major in sorted_majors)
num_columns = len(sorted_majors)
# Create rows for grid layout (each row has one version from each major)
# This creates the data structure for proper column grouping
version_rows = []
for minor_idx in range(max_minors):
row = []
for major in sorted_majors:
minors_list = sorted(major_to_minors[major], key=int, reverse=True)
if minor_idx < len(minors_list):
row.append((major, minors_list[minor_idx]))
else:
row.append(None) # Placeholder for empty cells
version_rows.append(row)
html = template.render( html = template.render(
title="Rocky Linux Man Pages", title="Rocky Linux Man Pages", version_rows=version_rows, num_columns=num_columns
versions=sorted(versions, key=version_key)
) )
index_path = self.output_dir / 'index.html' index_path = self.output_dir / "index.html"
with open(index_path, 'w', encoding='utf-8') as f: with open(index_path, "w", encoding="utf-8") as f:
f.write(html) f.write(html)
logger.info("Generated root index page") logger.info("Generated root index page")

View File

@@ -255,10 +255,7 @@
Search by <a href="https://fusejs.io/" target="_blank">Fuse.js</a> Search by <a href="https://fusejs.io/" target="_blank">Fuse.js</a>
</p> </p>
<p style="margin-top: 0.5rem; font-size: 0.85rem;"> <p style="margin-top: 0.5rem; font-size: 0.85rem;">
Rocky Linux is a trademark of the Rocky Enterprise Software Foundation. Rocky Linux is a trademark of the Rocky Enterprise Software Foundation.
</p>
<p style="margin-top: 0.5rem; font-size: 0.85rem;">
This tool is open source (MIT License). See THIRD-PARTY-LICENSES.md for attributions.
</p> </p>
</footer> </footer>

View File

@@ -1,7 +1,7 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block header_title %}Rocky Linux Man Pages{% endblock %} {% block header_title %}Rocky Linux Man Pages{% endblock %}
{% block header_subtitle %}Man page documentation for Rocky Linux packages{% endblock %} {% block header_subtitle %}Man page documentation for Rocky Linux packages{% endblock %}
{% block extra_css %} {% block extra_css %}
.logo-container { .logo-container {
@@ -15,9 +15,11 @@
height: auto; height: auto;
} }
.version-grid { .version-grid {
display: grid; display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); grid-template-columns: repeat({{ num_columns }}, 1fr);
gap: 1.5rem; gap: 1.5rem;
margin-top: 2rem; margin-top: 2rem;
} }
@@ -32,7 +34,7 @@
} }
.version-grid { .version-grid {
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); grid-template-columns: 1fr;
gap: 1rem; gap: 1rem;
} }
@@ -40,9 +42,21 @@
padding: 1.5rem; padding: 1.5rem;
} }
.version-card.small {
padding: 0.75rem;
}
.version-card.small {
padding: 0.75rem;
}
.version-number { .version-number {
font-size: 2rem; font-size: 2rem;
} }
.version-card.small .version-number {
font-size: 1.5rem;
}
} }
@media (max-width: 480px) { @media (max-width: 480px) {
@@ -55,6 +69,10 @@
gap: 1rem; gap: 1rem;
} }
.version-card.small {
padding: 0.5rem;
}
.intro { .intro {
font-size: 0.9rem; font-size: 0.9rem;
} }
@@ -71,6 +89,15 @@
display: block; display: block;
} }
.version-card.small {
padding: 1rem;
opacity: 0.7;
}
.version-card.small .version-number {
font-size: 1.8rem;
}
.version-card:hover { .version-card:hover {
transform: translateY(-2px); transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
@@ -128,19 +155,27 @@
<div class="intro"> <div class="intro">
<p> <p>
Man page documentation for packages in the Rocky Linux BaseOS and AppStream repositories. Man page documentation for packages in the Rocky Linux BaseOS and AppStream repositories.
</p> </p>
</div> </div>
<div class="version-section"> <div class="version-section">
<h2>Select Version</h2> <h2>Select Version</h2>
<div class="version-grid"> <div class="version-grid">
{% for version in versions %} {% for row in version_rows %}
<a href="{{ version }}/index.html" class="version-card"> {% set outer_loop = loop %}
<div class="version-number">{{ version }}</div> {% for item in row %}
<div class="version-label">Rocky Linux™</div> {% if item %}
{% set major, minor = item %}
<a href="{{ major }}.{{ minor }}/index.html" class="version-card{% if not outer_loop.first %} small{% endif %}">
<div class="version-number">{{ major }}.{{ minor }}</div>
{% if outer_loop.first %}
<div class="version-label">Rocky Linux</div>
<div class="version-browse">Browse man pages →</div> <div class="version-browse">Browse man pages →</div>
{% endif %}
</a> </a>
{% endif %}
{% endfor %}
{% endfor %} {% endfor %}
</div> </div>
</div> </div>