This commit is contained in:
Stephen Simpson
2025-01-04 08:18:27 -06:00
commit 2287678798
16 changed files with 1534 additions and 0 deletions

View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python3
import sys
import argparse
import re
from bs4 import BeautifulSoup
# Simplified CSS with meaningful class names
FILTERED_CSS = """
/* General Styles */
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
background-color: #0D0A09;
color: white;
}
/* Header Styles */
.header {
background-color: #0FB981;
color: white;
padding: 1rem;
text-align: center;
}
/* Main Content Styles */
.main-content {
margin: 2rem auto;
padding: 1rem;
background-color: #282828;
color: white;
max-width: 800px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
.main-content a {
color: #0FB981;
}
.head-vol {
color: white;
}
/* Responsive Adjustments */
@media (max-width: 600px) {
.main-content {
margin: 1rem;
padding: 0.5rem;
}
}
"""
# Define the HTML template with placeholders for title, nav, left pane, content, and right pane
HTML_TEMPLATE = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{file_name} - {rpm_name} - Rocky Man Page</title>
<style>
{css}
</style>
</head>
<body>
<header class="header">
<h1>{file_name}</h1>
</header>
<main class="main-content">
{content}
</main>
</body>
</html>
"""
def clean_html(html_content):
"""
Removes existing <html>, <head>, and <body> tags from the HTML content.
"""
html_content = re.sub(r'</?html[^>]*>', '', html_content, flags=re.IGNORECASE)
html_content = re.sub(r'</?head[^>]*>', '', html_content, flags=re.IGNORECASE)
html_content = re.sub(r'</?body[^>]*>', '', html_content, flags=re.IGNORECASE)
return html_content.strip()
def add_see_also_links(html_content):
"""
Adds hyperlinks to existing See Also sections in the HTML content.
"""
soup = BeautifulSoup(html_content, 'html.parser')
# Locate the section
sections = soup.find_all('section', class_='Sh')
# Loop through sections to find the one with "SEE ALSO"
for section in sections:
heading = section.find('h1', id="SEE_ALSO") # Look for the specific "SEE ALSO" heading
if heading: # If the heading exists in this section
extracted_content = []
for b_tag in section.find_all('b'):
text_with_parentheses = b_tag.get_text() + b_tag.next_sibling.strip() # Combine <b> text and next sibling
extracted_content.append(text_with_parentheses)
print(extracted_content)
def main():
parser = argparse.ArgumentParser(description="Wrap HTML content with a consistent theme including nav, left pane, and right pane.")
parser.add_argument('--rpm_name', type=str, help="RPM Name")
parser.add_argument('--file_name', type=str, help="File Name")
args = parser.parse_args()
# Read HTML content from stdin
input_html = sys.stdin.read()
# Extract or set the title
rpm_name = args.rpm_name
file_name = args.file_name
# Clean the HTML content
cleaned_content = clean_html(input_html)
# Add See Also links
content_with_links = add_see_also_links(cleaned_content)
# Fill the HTML template
themed_html = HTML_TEMPLATE.format(
rpm_name=rpm_name,
css=FILTERED_CSS,
file_name=file_name,
content=content_with_links
)
# Output the themed HTML to stdout
print(themed_html)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,48 @@
import os
import subprocess
from pathlib import Path
ROCKY_VERSION = "8.10"
MAN_PATH = f"./export/{ROCKY_VERSION}/"
HTML_BASE_PATH = f"./html_data2/{ROCKY_VERSION}/"
def process_file(file):
rpm_name = file.parts[3]
man_context = file.parts[7]
man_filename = file.name.replace('.gz', '').rsplit('.', 1)[0]
output_folder = Path(HTML_BASE_PATH) / rpm_name / man_context
output_folder.mkdir(parents=True, exist_ok=True)
print(man_filename)
try:
html_content = subprocess.check_output(
f'zcat "{file}" | mandoc -T html -O fragment 2>/tmp/mandoc_error.log | python3 ./apply_template.py --rpm_name "{rpm_name}" --file_name "{man_filename}"',
shell=True,
text=True
)
except subprocess.CalledProcessError:
print(f"Error processing file: {file}")
with open('/tmp/mandoc_error.log', 'r') as error_log:
print(error_log.read())
return
title = ""
for line in html_content.splitlines():
if '<h1>NAME</h1>' in line:
title = line.split('<p>')[1].split('</p>')[0].strip()
break
title = title or man_filename
if html_content:
with open(output_folder / f"{man_filename}.html", 'w') as f:
f.write(html_content)
def main():
for root, _, files in os.walk(MAN_PATH):
for file in files:
process_file(Path(root) / file)
if __name__ == "__main__":
main()

46
old_scripts/convert_man.sh Executable file
View File

@@ -0,0 +1,46 @@
#! /bin/bash
ROCKY_VERSION=8.10
MAN_PATH=./export/${ROCKY_VERSION}/
LOCAL_MAN_PATH=
HTML_BASE_PATH=./html_data/${ROCKY_VERSION}/
process_file() {
local file=$1
local rpm_name
rpm_name=$(echo "$file" | cut -d'/' -f 4)
local man_context
man_context=$(echo "$file" | cut -d'/' -f 8)
local man_filename
man_filename=$(echo "$file" | awk -F'/' '{print $NF}' | sed -e 's/.gz//g' -e 's/\.[0-9]*$//g')
local output_folder="${HTML_BASE_PATH}/${rpm_name}/${man_context}/"
echo "$man_filename"
mkdir -p "${output_folder}"
# Try to convert the file and capture any errors
# if ! html_content=$(zcat "$file" | groff -Thtml -P-D/dev/null -man 2>/tmp/groff_error.log | pandoc -f html -t html 2>/tmp/pandoc_error.log); then
if ! html_content=$(zcat "$file" | mandoc -T html -O fragment 2>/tmp/mandoc_error.log | python3 ./apply_template.py --rpm_name "$rpm_name" --file_name "$man_filename"); then
echo "Error processing file: $file"
cat /tmp/pandoc_error.log
return
fi
local title
title=$(echo "$html_content" | sed -n 's/.*<h1>NAME<\/h1>\s*<p>\(.*\)<\/p>/\1/p' | sed 's/<[^>]*>//g')
[ -z "$title" ] && title="$man_filename"
# Check if html_content is empty
if [ -n "$html_content" ]; then
echo -e "$html_content" > "${output_folder}${man_filename}.html"
# echo -e "---\ntitle: \"$title\"\n---\n$html_content" > "${output_folder}${man_filename}.html"
fi
}
export -f process_file
export HTML_BASE_PATH
find "$MAN_PATH" -type f | parallel --will-cite process_file

28
old_scripts/extract_man.sh Executable file
View File

@@ -0,0 +1,28 @@
#! /bin/bash
ROCKY_VERSION=8.10
MAN_OUTPUT=./export/${ROCKY_VERSION}/
DIRECTORY=$1
if [ -z "$DIRECTORY" ]; then
echo "Please provide the directory containing the RPM files"
exit 1
fi
mkdir -p "$MAN_OUTPUT"
extract_man_pages() {
local rpm=$1
local man_output=$2
MANCOUNT=$(rpm2cpio "$rpm" | cpio -itv --quiet | grep -c "/man/")
RPMNAME=$(rpm -qp --qf "%{NAME}\n" "$rpm")
if [ "$MANCOUNT" -ne 0 ]; then
mkdir -p "${man_output}/${RPMNAME}"
rpm2cpio "$rpm" | cpio -idmv --quiet -D "${man_output}/${RPMNAME}/" '*/man/*'
fi
}
export -f extract_man_pages
find "$DIRECTORY" -type f -name "*.rpm" | parallel --will-cite -j+0 extract_man_pages {} "$MAN_OUTPUT"

View File

@@ -0,0 +1,95 @@
import os
import json
import gzip
from string import Template
from collections import defaultdict
from fnmatch import fnmatch
from jinja2 import Environment, FileSystemLoader
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template('templates/index.j2')
directory = '/data/html_data' # Change this to your directory path
rocky_version = "8.10"
def generate_sitemap(directory):
links = defaultdict(lambda: defaultdict(dict))
for root, _, files in os.walk(directory):
for file in files:
full_filepath = os.path.join(root, file)
filepath = full_filepath.split(rocky_version, 1)[-1]
if any(fnmatch(filepath, pattern) for pattern in ['/index.html', '/links.html','/list.json*', '/sitemap*']):
continue
filepath_parts = filepath.split('/')
package_name = filepath_parts[1]
man_type = filepath_parts[2]
man_type_number = man_type.lstrip('man') if man_type.startswith('man') else man_type
command_file = filepath_parts[3]
command = command_file.split('.html', 1)[0]
if filepath.startswith('/'):
filepath = filepath[1:]
fullname = f"{package_name} - {command}({man_type_number})"
links[package_name][command] = {
"url": filepath,
"man_type": man_type,
"man_type_number": man_type_number,
"fullname": fullname
}
return links
def generate_links_html(links):
links_html = ""
for package_name in links.keys():
links_html += f"<h2>package_name</h2>"
links_html += "<ul>"
for command in links[package_name]:
url = links[package_name][command]['url']
man_type_number = links[package_name][command]['man_type_number']
links_html += f"<li><a href='{url}'>{command}</a>({man_type_number})</li>"
links_html += "</ul>"
data = {
'title': f"Rocky Man Page - {rocky_version}",
'header_title': f"Rocky Man Page - {rocky_version}",
'main_content': f"{links_html}"
}
return template.render(data)
def convert_sitemap_to_json(links, minify=False):
# data
# for package_name in links.keys():
# for command in links[package_name]:
# # Add command details to sitemap
# sitemap[package_name][command] = {
# "url": filepath,
# "mantype": man_type,
# "fullname": fullname
# }
if minify:
return json.dumps(links, separators=(',', ':'))
return json.dumps(links, indent=4)
if __name__ == "__main__":
sitemap = generate_sitemap(directory)
# Output the links HTML page to a file
with open(f"{directory}/{rocky_version}/links.html", "w") as file:
file.write(generate_links_html(sitemap))
# Output the list JSON to a file
with open(f"{directory}/{rocky_version}/list.json", "w") as file:
file.write(convert_sitemap_to_json(sitemap, minify=True))
# Gzip the JSON file
with gzip.open(f"{directory}/{rocky_version}/list.json.gz", "wb") as f_out:
f_out.write(convert_sitemap_to_json(sitemap, minify=True).encode('utf-8'))

View File

@@ -0,0 +1,32 @@
from jinja2 import Environment, FileSystemLoader
import os
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template('page.j2')
# Define the data to pass to the template
data = {
'title': 'Rocky Man Page - 8.10',
'header_title': 'Welcome to Rocky Man Page',
'main_content': '<input type="text" id="searchInput" placeholder="Search..."><ul><li>Item 1</li><li>Item 2</li></ul>'
}
# Render the template with the data
output = template.render(data)
print(output)

View File

@@ -0,0 +1,54 @@
import os
import json
import argparse
from collections import defaultdict
rocky_version = "8.10"
def create_sitemap(directory):
sitemap = defaultdict(lambda: defaultdict(dict))
for root, dirs, files in os.walk(directory):
for file in files:
full_filepath = os.path.join(root, file)
filepath = full_filepath.split(rocky_version, 1)[-1]
# Exclude any path containing 'index.html'
if 'index.html' in filepath or 'sitemap.json' in filepath or 'sitemap.xml' in filepath or 'list.json' in filepath or 'list.json.br' in filepath:
continue
filepath_parts = filepath.split('/')
package_name = filepath_parts[1]
man_type = filepath_parts[2]
man_type_number = man_type.lstrip('man') if man_type.startswith('man') else man_type
command_file = filepath_parts[3]
command = command_file.split('.html', 1)[0]
if filepath.startswith('/'):
filepath = filepath[1:]
fullname = f"{package_name} - {command}({man_type_number})"
# Add command details to sitemap
sitemap[package_name][command] = {
"url": filepath,
"mantype": man_type,
"fullname": fullname
}
return sitemap
def convert_sitemap_to_json(sitemap, minify=False):
if minify:
return json.dumps(sitemap, separators=(',', ':'))
return json.dumps(sitemap, indent=4)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate sitemap JSON.')
parser.add_argument('directory', type=str, help='Directory to scan for HTML files')
parser.add_argument('--minify', action='store_true', help='Export minified JSON')
args = parser.parse_args()
sitemap = create_sitemap(args.directory)
json_output = convert_sitemap_to_json(sitemap, minify=args.minify)
print(json_output)

135
old_scripts/index_base.html Normal file
View File

@@ -0,0 +1,135 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 36 36%22><text y=%2232%22 font-size=%2232%22>🚀</text></svg>">
<title>Rocky Man Page - 8.10</title>
<script src="https://cdn.jsdelivr.net/npm/fuse.js/dist/fuse.min.js"></script>
<style>
/* General Styles */
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
background-color: #0D0A09;
color: white;
}
li {
font-size: large;
list-style-type: none;
margin-bottom: 0.5rem;
}
/* Header Styles */
.header {
background-color: #0FB981;
color: white;
padding: 1rem;
text-align: center;
}
/* Main Content Styles */
.main-content {
margin: 2rem auto;
padding: 1rem;
background-color: #282828;
color: white;
max-width: 800px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
.main-content a {
color: #0FB981;
}
.head-vol {
color: white;
}
/* Responsive Adjustments */
@media (max-width: 600px) {
.main-content {
margin: 1rem;
padding: 0.5rem;
}
}
input#searchInput {
width: 98%;
height: 2rem;
padding: 0.5rem;
border-radius: 4px;
border: 1px solid #ccc;
margin-bottom: 1rem;
font-size: 1rem;
outline: none;
transition: border-color 0.3s ease, box-shadow 0.3s ease;
}
input#searchInput:focus {
border-color: #0FB981;
box-shadow: 0 0 8px 0 #0FB981;
}
#searchInputLabel {
display: block;
font-size: larger;
margin-bottom: 1rem;
}
</style>
</head>
<body>
<header class="header">
<h1>Rocky Linux 8.10 - Man Page Listing</h1>
</header>
<main class="main-content">
<label id="searchInputLabel" for="searchInput">Search:</label>
<input id="searchInput" placeholder="Loading..." oninput="searchItems()" role="search" disabled />
<br />
<ul id="results"></ul>
</main>
<script>
let fuse;
let index;
fetch('list.json.gz')
.then(response => response.body.pipeThrough(new DecompressionStream('gzip')))
.then(stream => new Response(stream))
.then(response => response.json())
.then(data => {
const flattenedData = [];
Object.values(data).forEach(category => {
Object.values(category).forEach(item => {
flattenedData.push(item);
});
});
fuse = new Fuse(flattenedData, {
keys: ['fullname'],
threshold: 0.2
});
index = fuse.index; // Create the index
document.getElementById("searchInput").placeholder = "";
document.getElementById("searchInput").disabled = false;
});
function searchItems() {
const query = document.getElementById("searchInput").value;
const results = fuse.search(query, { limit: 50 }); // Limit results for performance
const list = document.getElementById("results");
list.innerHTML = "";
results.forEach(item => {
const li = document.createElement("li");
const a = document.createElement("a");
a.href = item.item.url;
a.textContent = item.item.fullname;
li.appendChild(a);
list.appendChild(li);
});
}
</script>
</body>
</html>

View File

@@ -0,0 +1,5 @@
beautifulsoup4==4.12.3
Jinja2==3.1.4
MarkupSafe==3.0.2
setuptools==68.2.2
soupsieve==2.6