Init

2025-01-04 08:18:27 -06:00
commit 2287678798
16 changed files with 1534 additions and 0 deletions
--- a/old_scripts/apply_template.py
+++ b/old_scripts/apply_template.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+import re
+from bs4 import BeautifulSoup
+
+# Simplified CSS with meaningful class names
+FILTERED_CSS = """
+/* General Styles */
+body {
+    font-family: Arial, sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #0D0A09;
+    color: white;
+}
+
+/* Header Styles */
+.header {
+    background-color: #0FB981;
+    color: white;
+    padding: 1rem;
+    text-align: center;
+}
+
+/* Main Content Styles */
+.main-content {
+    margin: 2rem auto;
+    padding: 1rem;
+    background-color: #282828;
+    color: white;
+    max-width: 800px;
+    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+}
+
+.main-content a {
+    color: #0FB981;
+}
+
+.head-vol {
+    color: white;
+}
+
+/* Responsive Adjustments */
+@media (max-width: 600px) {
+    .main-content {
+        margin: 1rem;
+        padding: 0.5rem;
+    }
+}
+"""
+
+# Define the HTML template with placeholders for title, nav, left pane, content, and right pane
+HTML_TEMPLATE = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>{file_name} - {rpm_name} - Rocky Man Page</title>
+    <style>
+    {css}
+    </style>
+</head>
+<body>
+    <header class="header">
+        <h1>{file_name}</h1>
+    </header>
+    <main class="main-content">
+        {content}
+    </main>
+</body>
+</html>
+"""
+
+def clean_html(html_content):
+    """
+    Removes existing <html>, <head>, and <body> tags from the HTML content.
+    """
+    html_content = re.sub(r'</?html[^>]*>', '', html_content, flags=re.IGNORECASE)
+    html_content = re.sub(r'</?head[^>]*>', '', html_content, flags=re.IGNORECASE)
+    html_content = re.sub(r'</?body[^>]*>', '', html_content, flags=re.IGNORECASE)
+    return html_content.strip()
+
+def add_see_also_links(html_content):
+    """
+    Adds hyperlinks to existing See Also sections in the HTML content.
+    """
+    soup = BeautifulSoup(html_content, 'html.parser')
+
+    # Locate the section
+    sections = soup.find_all('section', class_='Sh')
+
+    # Loop through sections to find the one with "SEE ALSO"
+    for section in sections:
+        heading = section.find('h1', id="SEE_ALSO")  # Look for the specific "SEE ALSO" heading
+        if heading:  # If the heading exists in this section
+            extracted_content = []
+            for b_tag in section.find_all('b'):
+                text_with_parentheses = b_tag.get_text() + b_tag.next_sibling.strip()  # Combine <b> text and next sibling
+                extracted_content.append(text_with_parentheses)
+            print(extracted_content)
+            
+def main():
+    parser = argparse.ArgumentParser(description="Wrap HTML content with a consistent theme including nav, left pane, and right pane.")
+    parser.add_argument('--rpm_name', type=str, help="RPM Name")
+    parser.add_argument('--file_name', type=str, help="File Name")
+    args = parser.parse_args()
+
+    # Read HTML content from stdin
+    input_html = sys.stdin.read()
+
+    # Extract or set the title
+    rpm_name = args.rpm_name
+    file_name = args.file_name
+
+    # Clean the HTML content
+    cleaned_content = clean_html(input_html)
+
+    # Add See Also links
+    content_with_links = add_see_also_links(cleaned_content)
+
+    # Fill the HTML template
+    themed_html = HTML_TEMPLATE.format(
+        rpm_name=rpm_name,
+        css=FILTERED_CSS,
+        file_name=file_name,
+        content=content_with_links
+    )
+
+    # Output the themed HTML to stdout
+    print(themed_html)
+
+if __name__ == "__main__":
+    main()
--- a/old_scripts/convert_man.py
+++ b/old_scripts/convert_man.py
@@ -0,0 +1,48 @@
+import os
+import subprocess
+from pathlib import Path
+
+ROCKY_VERSION = "8.10"
+MAN_PATH = f"./export/{ROCKY_VERSION}/"
+HTML_BASE_PATH = f"./html_data2/{ROCKY_VERSION}/"
+
+def process_file(file):
+    rpm_name = file.parts[3]
+    man_context = file.parts[7]
+    man_filename = file.name.replace('.gz', '').rsplit('.', 1)[0]
+
+    output_folder = Path(HTML_BASE_PATH) / rpm_name / man_context
+    output_folder.mkdir(parents=True, exist_ok=True)
+
+    print(man_filename)
+
+    try:
+        html_content = subprocess.check_output(
+            f'zcat "{file}" | mandoc -T html -O fragment 2>/tmp/mandoc_error.log | python3 ./apply_template.py --rpm_name "{rpm_name}" --file_name "{man_filename}"',
+            shell=True,
+            text=True
+        )
+    except subprocess.CalledProcessError:
+        print(f"Error processing file: {file}")
+        with open('/tmp/mandoc_error.log', 'r') as error_log:
+            print(error_log.read())
+        return
+
+    title = ""
+    for line in html_content.splitlines():
+        if '<h1>NAME</h1>' in line:
+            title = line.split('<p>')[1].split('</p>')[0].strip()
+            break
+    title = title or man_filename
+
+    if html_content:
+        with open(output_folder / f"{man_filename}.html", 'w') as f:
+            f.write(html_content)
+
+def main():
+    for root, _, files in os.walk(MAN_PATH):
+        for file in files:
+            process_file(Path(root) / file)
+
+if __name__ == "__main__":
+    main()
--- a/old_scripts/convert_man.sh
+++ b/old_scripts/convert_man.sh
@@ -0,0 +1,46 @@
+#! /bin/bash
+
+ROCKY_VERSION=8.10
+MAN_PATH=./export/${ROCKY_VERSION}/
+LOCAL_MAN_PATH=
+HTML_BASE_PATH=./html_data/${ROCKY_VERSION}/
+
+process_file() {
+    local file=$1
+
+    local rpm_name
+    rpm_name=$(echo "$file" | cut -d'/' -f 4)
+    local man_context
+    man_context=$(echo "$file" | cut -d'/' -f 8)
+    local man_filename
+    man_filename=$(echo "$file" | awk -F'/' '{print $NF}' | sed -e 's/.gz//g' -e 's/\.[0-9]*$//g')
+
+    local output_folder="${HTML_BASE_PATH}/${rpm_name}/${man_context}/"
+
+    echo "$man_filename"
+
+    mkdir -p "${output_folder}"
+
+    # Try to convert the file and capture any errors
+    # if ! html_content=$(zcat "$file" | groff -Thtml -P-D/dev/null -man 2>/tmp/groff_error.log | pandoc -f html -t html 2>/tmp/pandoc_error.log); then
+    if ! html_content=$(zcat "$file" | mandoc -T html -O fragment 2>/tmp/mandoc_error.log | python3 ./apply_template.py  --rpm_name "$rpm_name" --file_name "$man_filename"); then
+        echo "Error processing file: $file"
+        cat /tmp/pandoc_error.log
+        return
+    fi
+    
+    local title
+    title=$(echo "$html_content" | sed -n 's/.*<h1>NAME<\/h1>\s*<p>\(.*\)<\/p>/\1/p' | sed 's/<[^>]*>//g')
+    [ -z "$title" ] && title="$man_filename"
+
+    # Check if html_content is empty
+    if [ -n "$html_content" ]; then
+        echo -e "$html_content" > "${output_folder}${man_filename}.html"
+        # echo -e "---\ntitle: \"$title\"\n---\n$html_content" > "${output_folder}${man_filename}.html"
+    fi
+}
+
+export -f process_file
+export HTML_BASE_PATH
+
+find "$MAN_PATH" -type f | parallel --will-cite process_file
--- a/old_scripts/extract_man.sh
+++ b/old_scripts/extract_man.sh
@@ -0,0 +1,28 @@
+#! /bin/bash
+
+ROCKY_VERSION=8.10
+MAN_OUTPUT=./export/${ROCKY_VERSION}/
+DIRECTORY=$1
+
+if [ -z "$DIRECTORY" ]; then
+    echo "Please provide the directory containing the RPM files"
+    exit 1
+fi
+
+mkdir -p "$MAN_OUTPUT"
+
+extract_man_pages() {
+    local rpm=$1
+    local man_output=$2
+
+    MANCOUNT=$(rpm2cpio "$rpm" | cpio -itv --quiet | grep -c "/man/")
+    RPMNAME=$(rpm -qp --qf "%{NAME}\n" "$rpm")
+    if [ "$MANCOUNT" -ne 0 ]; then
+        mkdir -p "${man_output}/${RPMNAME}"
+        rpm2cpio "$rpm" | cpio -idmv --quiet -D "${man_output}/${RPMNAME}/" '*/man/*'
+    fi
+}
+
+export -f extract_man_pages
+
+find "$DIRECTORY" -type f -name "*.rpm" | parallel --will-cite -j+0 extract_man_pages {} "$MAN_OUTPUT"
--- a/old_scripts/generate_index.py
+++ b/old_scripts/generate_index.py
@@ -0,0 +1,95 @@
+import os
+import json
+import gzip
+from string import Template
+from collections import defaultdict
+from fnmatch import fnmatch
+from jinja2 import Environment, FileSystemLoader
+
+env = Environment(loader=FileSystemLoader('.'))
+template = env.get_template('templates/index.j2')
+
+directory = '/data/html_data'  # Change this to your directory path
+rocky_version = "8.10"
+
+def generate_sitemap(directory):
+    links = defaultdict(lambda: defaultdict(dict))
+    for root, _, files in os.walk(directory):
+        for file in files:
+            full_filepath = os.path.join(root, file)
+            filepath = full_filepath.split(rocky_version, 1)[-1]
+            
+            if any(fnmatch(filepath, pattern) for pattern in ['/index.html', '/links.html','/list.json*', '/sitemap*']):
+                continue
+            
+            filepath_parts = filepath.split('/')
+            package_name = filepath_parts[1]
+            man_type = filepath_parts[2]
+            man_type_number = man_type.lstrip('man') if man_type.startswith('man') else man_type
+            command_file = filepath_parts[3]
+            command = command_file.split('.html', 1)[0]
+            
+            if filepath.startswith('/'):
+                filepath = filepath[1:]
+            
+            fullname = f"{package_name} - {command}({man_type_number})"
+            
+            links[package_name][command] = {
+                "url": filepath,
+                "man_type": man_type,
+                "man_type_number": man_type_number,
+                "fullname": fullname
+            }
+
+    return links
+
+def generate_links_html(links):
+    links_html = ""
+    
+    for package_name in links.keys():
+        links_html += f"<h2>package_name</h2>"
+        links_html += "<ul>"
+        for command in links[package_name]:
+            url = links[package_name][command]['url']
+            man_type_number = links[package_name][command]['man_type_number']
+            links_html += f"<li><a href='{url}'>{command}</a>({man_type_number})</li>"
+        links_html += "</ul>"
+        
+    data = {
+    'title': f"Rocky Man Page - {rocky_version}",
+    'header_title': f"Rocky Man Page - {rocky_version}",
+    'main_content': f"{links_html}"
+    }   
+
+    return template.render(data)
+
+def convert_sitemap_to_json(links, minify=False):
+    # data
+    # for package_name in links.keys():
+    #     for command in links[package_name]:
+
+    #     # Add command details to sitemap
+    #     sitemap[package_name][command] = {
+    #         "url": filepath,
+    #         "mantype": man_type,
+    #         "fullname": fullname
+    #     }
+
+    if minify:
+        return json.dumps(links, separators=(',', ':'))
+    return json.dumps(links, indent=4)
+
+if __name__ == "__main__":
+    sitemap = generate_sitemap(directory)
+
+    # Output the links HTML page to a file
+    with open(f"{directory}/{rocky_version}/links.html", "w") as file:
+        file.write(generate_links_html(sitemap))
+
+    # Output the list JSON to a file
+    with open(f"{directory}/{rocky_version}/list.json", "w") as file:
+        file.write(convert_sitemap_to_json(sitemap, minify=True))
+        
+    # Gzip the JSON file
+    with gzip.open(f"{directory}/{rocky_version}/list.json.gz", "wb") as f_out:
+        f_out.write(convert_sitemap_to_json(sitemap, minify=True).encode('utf-8'))
--- a/old_scripts/generate_jinja.py
+++ b/old_scripts/generate_jinja.py
@@ -0,0 +1,32 @@
+from jinja2 import Environment, FileSystemLoader
+import os
+
+env = Environment(loader=FileSystemLoader('.'))
+template = env.get_template('page.j2')
+
+# Define the data to pass to the template
+data = {
+    'title': 'Rocky Man Page - 8.10',
+    'header_title': 'Welcome to Rocky Man Page',
+    'main_content': '<input type="text" id="searchInput" placeholder="Search..."><ul><li>Item 1</li><li>Item 2</li></ul>'
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Render the template with the data
+output = template.render(data)
+
+print(output)
--- a/old_scripts/generate_json.py
+++ b/old_scripts/generate_json.py
@@ -0,0 +1,54 @@
+import os
+import json
+import argparse
+from collections import defaultdict
+
+rocky_version = "8.10"
+
+def create_sitemap(directory):
+    sitemap = defaultdict(lambda: defaultdict(dict))
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            full_filepath = os.path.join(root, file)
+            filepath = full_filepath.split(rocky_version, 1)[-1]
+            
+            # Exclude any path containing 'index.html'
+            if 'index.html' in filepath or 'sitemap.json' in filepath or 'sitemap.xml' in filepath or 'list.json' in filepath or 'list.json.br' in filepath:
+                continue
+            
+            filepath_parts = filepath.split('/')
+            package_name = filepath_parts[1]
+            man_type = filepath_parts[2]
+            man_type_number = man_type.lstrip('man') if man_type.startswith('man') else man_type
+            command_file = filepath_parts[3]
+            command = command_file.split('.html', 1)[0]
+            
+            if filepath.startswith('/'):
+                filepath = filepath[1:]
+            
+            fullname = f"{package_name} - {command}({man_type_number})"
+            
+            # Add command details to sitemap
+            sitemap[package_name][command] = {
+                "url": filepath,
+                "mantype": man_type,
+                "fullname": fullname
+            }
+    
+    return sitemap
+
+def convert_sitemap_to_json(sitemap, minify=False):
+    if minify:
+        return json.dumps(sitemap, separators=(',', ':'))
+    return json.dumps(sitemap, indent=4)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Generate sitemap JSON.')
+    parser.add_argument('directory', type=str, help='Directory to scan for HTML files')
+    parser.add_argument('--minify', action='store_true', help='Export minified JSON')
+    args = parser.parse_args()
+
+    sitemap = create_sitemap(args.directory)
+    json_output = convert_sitemap_to_json(sitemap, minify=args.minify)
+    
+    print(json_output)
--- a/old_scripts/index_base.html
+++ b/old_scripts/index_base.html
@@ -0,0 +1,135 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+    <meta charset="utf-8">
+    <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 36 36%22><text y=%2232%22 font-size=%2232%22>🚀</text></svg>">
+    <title>Rocky Man Page - 8.10</title>
+    <script src="https://cdn.jsdelivr.net/npm/fuse.js/dist/fuse.min.js"></script>
+    <style>
+        /* General Styles */
+        body {
+            font-family: Arial, sans-serif;
+            margin: 0;
+            padding: 0;
+            background-color: #0D0A09;
+            color: white;
+        }
+
+        li {
+            font-size: large;
+            list-style-type: none;
+            margin-bottom: 0.5rem;
+        }
+
+        /* Header Styles */
+        .header {
+            background-color: #0FB981;
+            color: white;
+            padding: 1rem;
+            text-align: center;
+        }
+
+        /* Main Content Styles */
+        .main-content {
+            margin: 2rem auto;
+            padding: 1rem;
+            background-color: #282828;
+            color: white;
+            max-width: 800px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+
+        .main-content a {
+            color: #0FB981;
+        }
+
+        .head-vol {
+            color: white;
+        }
+
+        /* Responsive Adjustments */
+        @media (max-width: 600px) {
+            .main-content {
+                margin: 1rem;
+                padding: 0.5rem;
+            }
+        }
+
+        input#searchInput {
+            width: 98%;
+            height: 2rem;
+            padding: 0.5rem;
+            border-radius: 4px;
+            border: 1px solid #ccc;
+            margin-bottom: 1rem;
+            font-size: 1rem;
+            outline: none;
+            transition: border-color 0.3s ease, box-shadow 0.3s ease;
+        }
+
+        input#searchInput:focus {
+            border-color: #0FB981;
+            box-shadow: 0 0 8px 0 #0FB981;
+        }
+
+        #searchInputLabel {
+            display: block;
+            font-size: larger;
+            margin-bottom: 1rem;
+        }
+    </style>
+</head>
+
+<body>
+    <header class="header">
+        <h1>Rocky Linux 8.10 - Man Page Listing</h1>
+    </header>
+    <main class="main-content">
+        <label id="searchInputLabel" for="searchInput">Search:</label>
+        <input id="searchInput" placeholder="Loading..." oninput="searchItems()" role="search" disabled />
+        <br />
+        <ul id="results"></ul>
+    </main>
+    <script>
+        let fuse;
+        let index;
+
+        fetch('list.json.gz')
+            .then(response => response.body.pipeThrough(new DecompressionStream('gzip')))
+            .then(stream => new Response(stream))
+            .then(response => response.json())
+            .then(data => {
+                const flattenedData = [];
+                Object.values(data).forEach(category => {
+                    Object.values(category).forEach(item => {
+                        flattenedData.push(item);
+                    });
+                });
+                fuse = new Fuse(flattenedData, {
+                    keys: ['fullname'],
+                    threshold: 0.2
+                });
+                index = fuse.index; // Create the index
+                document.getElementById("searchInput").placeholder = "";
+                document.getElementById("searchInput").disabled = false;
+            });
+        function searchItems() {
+            const query = document.getElementById("searchInput").value;
+            const results = fuse.search(query, { limit: 50 }); // Limit results for performance
+            const list = document.getElementById("results");
+            list.innerHTML = "";
+            results.forEach(item => {
+                const li = document.createElement("li");
+                const a = document.createElement("a");
+                a.href = item.item.url;
+                a.textContent = item.item.fullname;
+                li.appendChild(a);
+                list.appendChild(li);
+            });
+        }
+    </script>
+
+</body>
+
+</html>
--- a/old_scripts/requirements.txt
+++ b/old_scripts/requirements.txt
@@ -0,0 +1,5 @@
+beautifulsoup4==4.12.3
+Jinja2==3.1.4
+MarkupSafe==3.0.2
+setuptools==68.2.2
+soupsieve==2.6