Module autotoc
autotoc.py
Adds a left side panel with a Table of Contents (TOC) to a given HTML file (for instance, a Jupyter-exported HTML page). The TOC is built from heading tags (h1 up to hN) where N is a configurable maximum level (default: 4, but can go up to 6 if needed).
The output HTML file uses a layout and styling similar to the SFPPy Wiki project.
Usage
python autotoc.py input.html output.html [max_level]
References
- Project: SFPPy Wiki Pages
- Maintained by: INRAE\olivier.vitrac@agroparistech.fr
- Version: Retrieved from ../utils/VERSION.txt (run from SFPPy/utils/)
Expand source code
#!/usr/bin/env python3
"""
autotoc.py
Adds a left side panel with a Table of Contents (TOC) to a given HTML file (for instance,
a Jupyter-exported HTML page). The TOC is built from heading tags (h1 up to hN) where N is a
configurable maximum level (default: 4, but can go up to 6 if needed).
The output HTML file uses a layout and styling similar to the SFPPy Wiki project.
Usage:
python autotoc.py input.html output.html [max_level]
References:
- Project: SFPPy Wiki Pages
- Maintained by: INRAE\olivier.vitrac@agroparistech.fr
- Version: Retrieved from ../utils/VERSION.txt (run from SFPPy/utils/)
"""
import os
import re
import sys
import html
from bs4 import BeautifulSoup
# Constants for SFPPy project info
PROJECT_NAME = "🍏⏩🍎 SFPPy Notebooks"
CONTACT = "INRAE\\olivier.vitrac@agroparistech.fr"
REPO = "https://github.com/ovitrac/SFPPy"
BADGE = "https://img.shields.io/badge/GitHub-SFPPy-4CAF50?style=for-the-badge&logo=github"
EMAIL = "olivier.vitrac@agmail.com"
def get_version():
"""
Extract the version number of SFPPy from the VERSION.txt file located in ../utils/VERSION.txt.
"""
base_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))
version_file = os.path.join(base_dir, "utils", "VERSION.txt")
if not os.path.isfile(version_file):
sys.stderr.write(f"Error: {version_file} not found. Please create a file with content: version=\"XX.YY.ZZ\"\n")
sys.exit(1)
with open(version_file, "r") as f:
for line in f:
line = line.strip()
match = re.match(r'^version\s*=\s*"(.*?)"$', line)
if match:
return match.group(1)
sys.stderr.write(f"Error: No valid version string found in {version_file}. Ensure it contains: version=\"XX.YY.ZZ\"\n")
sys.exit(1)
VERSION = get_version()
SFPPy_VERSION = f"SFPPy v.{VERSION}"
# SFPPy infos
miniheader = f"""
<div style="display: flex; align-items: center; gap: 12px;">
<a href="{REPO}" target="_blank">
<img src="{BADGE}"
alt="GitHub SFPPy" style="border-radius: 8px;">
</a>
<div style="display: flex; align-items: center; font-size: 14px; font-weight: bold;">
<span style="color: #4CAF50;">SFPPy v{VERSION}</span>
<a href="mailto:{EMAIL}" title="E-mail the author: Olivier Vitrac" style="margin-left: 8px; font-size: 20px;">📩</a>
</div>
</div>
"""
def slugify(text, existing_ids):
"""
Generate a URL-friendly slug for a given text.
If the slug already exists in existing_ids, append a suffix.
"""
slug = re.sub(r'\s+', '-', text.lower())
slug = re.sub(r'[^a-z0-9\-]', '', slug)
orig_slug = slug
count = 1
while slug in existing_ids:
slug = f"{orig_slug}-{count}"
count += 1
existing_ids.add(slug)
return slug
def generate_nested_toc(headers):
"""
Generate nested TOC HTML from a list of headers.
headers is a list of tuples: (level, text, id)
"""
toc_html = ""
current_level = 0
for level, text, hid in headers:
while current_level < level:
toc_html += "<ul>\n"
current_level += 1
while current_level > level:
toc_html += "</ul>\n"
current_level -= 1
toc_html += f"<li><a href='#{html.escape(hid)}'>{html.escape(text)}</a></li>\n"
while current_level > 0:
toc_html += "</ul>\n"
current_level -= 1
return toc_html
def add_toc_and_layout(html_content, max_level=4):
"""
Parses the HTML content, generates a Table of Contents (TOC) from heading tags (h1 to h{max_level}),
and injects a left side panel with the TOC, project info, and layout styling.
"""
soup = BeautifulSoup(html_content, "html.parser")
# Cap max_level to 6 if user passes a higher value.
max_level = min(max_level, 6)
header_tags = [f"h{i}" for i in range(1, max_level+1)]
headers = []
existing_ids = set()
for header in soup.find_all(header_tags):
# Get header text and remove trailing "¶" if present.
header_text = header.get_text().strip()
if header_text.endswith("¶"):
header_text = header_text[:-1].strip()
try:
level = int(header.name[1])
except ValueError:
continue
if level > max_level:
continue
# Ensure header has an id for linking.
if not header.has_attr("id") or not header["id"]:
header_id = slugify(header_text, existing_ids)
header["id"] = header_id
else:
header_id = header["id"]
existing_ids.add(header_id)
headers.append((level, header_text, header_id))
toc_html = "<div id='toc'><h2>Table of Contents</h2>\n" + generate_nested_toc(headers) + "</div>\n"
# Create a header element with a toggle button and project title.
header_elem = soup.new_tag("header")
toggle_btn = soup.new_tag("button", id="toggleSidebar", **{
"class": "toggle-btn",
"aria-label": "Toggle Sidebar",
"aria-expanded": "false"
})
toggle_btn.string = "\u2630" # Hamburger icon
header_elem.append(toggle_btn)
h1_title = soup.new_tag("h1")
h1_title.string = PROJECT_NAME
header_elem.append(h1_title)
# Create nav (left side panel) element and insert version and contact info.
nav_elem = soup.new_tag("div", id="nav")
info_html_old = (
f"<p><strong>Version:</strong> {html.escape(SFPPy_VERSION)}</p>\n"
f"<p><strong>Maintained by:</strong> {html.escape(CONTACT)}</p>\n<hr>\n"
)
info_html = miniheader
nav_elem.append(BeautifulSoup(info_html, "html.parser"))
nav_elem.append(BeautifulSoup(toc_html, "html.parser"))
# Create main content container and move the original body content into it.
main_elem = soup.new_tag("div", id="main")
if soup.body:
original_body_contents = list(soup.body.contents)
for element in original_body_contents:
main_elem.append(element.extract())
else:
main_elem.append(soup)
# Create a container to hold the nav and main panels.
content_elem = soup.new_tag("div", id="content")
content_elem.append(nav_elem)
content_elem.append(main_elem)
# Clear the body and add the header and content container.
if not soup.body:
body = soup.new_tag("body")
soup.append(body)
else:
soup.body.clear()
soup.body.append(header_elem)
soup.body.append(content_elem)
# Insert CSS style into the <head>.
css_style = """
body {
font-family: 'Segoe UI', Arial, sans-serif;
margin: 0;
padding: 0;
background-color: #f9f9f9;
color: #333;
}
header {
background: #4CAF50;
color: #fff;
padding: 10px;
position: relative;
}
header h1 {
margin: 0;
font-size: 1.5em;
padding-left: 50px;
}
#content {
display: flex;
height: calc(100vh - 50px);
}
#nav {
width: 300px;
background: #fff;
border-right: 1px solid #ddd;
padding: 20px;
overflow-y: auto;
box-sizing: border-box;
flex-shrink: 0;
}
#nav.collapsed {
width: 0;
padding: 20px 0;
}
#main {
flex: 1;
padding: 20px;
overflow-y: auto;
box-sizing: border-box;
}
.toggle-btn {
position: absolute;
top: 50%;
left: 10px;
transform: translateY(-50%);
background-color: #4CAF50;
border: none;
color: white;
padding: 10px 12px;
cursor: pointer;
font-size: 1.2em;
border-radius: 4px;
z-index: 1001;
}
.toggle-btn:hover {
background-color: #45a049;
}
#toc ul {
list-style-type: none;
padding-left: 15px;
margin: 0;
}
#toc li {
margin: 5px 0;
}
#toc a {
text-decoration: none;
color: #007BFF;
}
#toc a:hover {
text-decoration: underline;
}
@media screen and (max-width: 768px) {
#nav {
position: absolute;
left: 0;
top: 50px;
height: calc(100% - 50px);
z-index: 1000;
}
}
"""
style_tag = soup.new_tag("style", type="text/css")
style_tag.string = css_style
if soup.head:
soup.head.append(style_tag)
else:
head_tag = soup.new_tag("head")
head_tag.append(style_tag)
soup.insert(0, head_tag)
# Insert JavaScript for toggling the sidebar.
js_script = """
document.addEventListener("DOMContentLoaded", function(){
var toggleButton = document.getElementById("toggleSidebar");
var nav = document.getElementById("nav");
toggleButton.addEventListener("click", function(){
nav.classList.toggle("collapsed");
if(nav.classList.contains("collapsed")){
toggleButton.innerHTML = '<kbd>☰</kbd>';
toggleButton.setAttribute('aria-expanded', 'false');
} else {
toggleButton.innerHTML = '<kbd>✕</kbd>';
toggleButton.setAttribute('aria-expanded', 'true');
}
});
});
"""
script_tag = soup.new_tag("script")
script_tag.string = js_script
if soup.body:
soup.body.append(script_tag)
else:
soup.append(script_tag)
return str(soup)
def main():
if len(sys.argv) < 3:
print("Usage: python autotoc.py input.html output.html [max_level]")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
max_level = 4
if len(sys.argv) >= 4:
try:
max_level = int(sys.argv[3])
except ValueError:
print("Invalid max_level value, using default 4.")
max_level = 4
with open(input_file, "r", encoding="utf-8") as f:
html_content = f.read()
new_html = add_toc_and_layout(html_content, max_level)
with open(output_file, "w", encoding="utf-8") as f:
f.write(new_html)
print(f"TOC and layout added. Output written to {output_file}")
if __name__ == "__main__":
main()
Functions
def add_toc_and_layout(html_content, max_level=4)
-
Parses the HTML content, generates a Table of Contents (TOC) from heading tags (h1 to h{max_level}), and injects a left side panel with the TOC, project info, and layout styling.
Expand source code
def add_toc_and_layout(html_content, max_level=4): """ Parses the HTML content, generates a Table of Contents (TOC) from heading tags (h1 to h{max_level}), and injects a left side panel with the TOC, project info, and layout styling. """ soup = BeautifulSoup(html_content, "html.parser") # Cap max_level to 6 if user passes a higher value. max_level = min(max_level, 6) header_tags = [f"h{i}" for i in range(1, max_level+1)] headers = [] existing_ids = set() for header in soup.find_all(header_tags): # Get header text and remove trailing "¶" if present. header_text = header.get_text().strip() if header_text.endswith("¶"): header_text = header_text[:-1].strip() try: level = int(header.name[1]) except ValueError: continue if level > max_level: continue # Ensure header has an id for linking. if not header.has_attr("id") or not header["id"]: header_id = slugify(header_text, existing_ids) header["id"] = header_id else: header_id = header["id"] existing_ids.add(header_id) headers.append((level, header_text, header_id)) toc_html = "<div id='toc'><h2>Table of Contents</h2>\n" + generate_nested_toc(headers) + "</div>\n" # Create a header element with a toggle button and project title. header_elem = soup.new_tag("header") toggle_btn = soup.new_tag("button", id="toggleSidebar", **{ "class": "toggle-btn", "aria-label": "Toggle Sidebar", "aria-expanded": "false" }) toggle_btn.string = "\u2630" # Hamburger icon header_elem.append(toggle_btn) h1_title = soup.new_tag("h1") h1_title.string = PROJECT_NAME header_elem.append(h1_title) # Create nav (left side panel) element and insert version and contact info. nav_elem = soup.new_tag("div", id="nav") info_html_old = ( f"<p><strong>Version:</strong> {html.escape(SFPPy_VERSION)}</p>\n" f"<p><strong>Maintained by:</strong> {html.escape(CONTACT)}</p>\n<hr>\n" ) info_html = miniheader nav_elem.append(BeautifulSoup(info_html, "html.parser")) nav_elem.append(BeautifulSoup(toc_html, "html.parser")) # Create main content container and move the original body content into it. main_elem = soup.new_tag("div", id="main") if soup.body: original_body_contents = list(soup.body.contents) for element in original_body_contents: main_elem.append(element.extract()) else: main_elem.append(soup) # Create a container to hold the nav and main panels. content_elem = soup.new_tag("div", id="content") content_elem.append(nav_elem) content_elem.append(main_elem) # Clear the body and add the header and content container. if not soup.body: body = soup.new_tag("body") soup.append(body) else: soup.body.clear() soup.body.append(header_elem) soup.body.append(content_elem) # Insert CSS style into the <head>. css_style = """ body { font-family: 'Segoe UI', Arial, sans-serif; margin: 0; padding: 0; background-color: #f9f9f9; color: #333; } header { background: #4CAF50; color: #fff; padding: 10px; position: relative; } header h1 { margin: 0; font-size: 1.5em; padding-left: 50px; } #content { display: flex; height: calc(100vh - 50px); } #nav { width: 300px; background: #fff; border-right: 1px solid #ddd; padding: 20px; overflow-y: auto; box-sizing: border-box; flex-shrink: 0; } #nav.collapsed { width: 0; padding: 20px 0; } #main { flex: 1; padding: 20px; overflow-y: auto; box-sizing: border-box; } .toggle-btn { position: absolute; top: 50%; left: 10px; transform: translateY(-50%); background-color: #4CAF50; border: none; color: white; padding: 10px 12px; cursor: pointer; font-size: 1.2em; border-radius: 4px; z-index: 1001; } .toggle-btn:hover { background-color: #45a049; } #toc ul { list-style-type: none; padding-left: 15px; margin: 0; } #toc li { margin: 5px 0; } #toc a { text-decoration: none; color: #007BFF; } #toc a:hover { text-decoration: underline; } @media screen and (max-width: 768px) { #nav { position: absolute; left: 0; top: 50px; height: calc(100% - 50px); z-index: 1000; } } """ style_tag = soup.new_tag("style", type="text/css") style_tag.string = css_style if soup.head: soup.head.append(style_tag) else: head_tag = soup.new_tag("head") head_tag.append(style_tag) soup.insert(0, head_tag) # Insert JavaScript for toggling the sidebar. js_script = """ document.addEventListener("DOMContentLoaded", function(){ var toggleButton = document.getElementById("toggleSidebar"); var nav = document.getElementById("nav"); toggleButton.addEventListener("click", function(){ nav.classList.toggle("collapsed"); if(nav.classList.contains("collapsed")){ toggleButton.innerHTML = '<kbd>☰</kbd>'; toggleButton.setAttribute('aria-expanded', 'false'); } else { toggleButton.innerHTML = '<kbd>✕</kbd>'; toggleButton.setAttribute('aria-expanded', 'true'); } }); }); """ script_tag = soup.new_tag("script") script_tag.string = js_script if soup.body: soup.body.append(script_tag) else: soup.append(script_tag) return str(soup)
def generate_nested_toc(headers)
-
Generate nested TOC HTML from a list of headers. headers is a list of tuples: (level, text, id)
Expand source code
def generate_nested_toc(headers): """ Generate nested TOC HTML from a list of headers. headers is a list of tuples: (level, text, id) """ toc_html = "" current_level = 0 for level, text, hid in headers: while current_level < level: toc_html += "<ul>\n" current_level += 1 while current_level > level: toc_html += "</ul>\n" current_level -= 1 toc_html += f"<li><a href='#{html.escape(hid)}'>{html.escape(text)}</a></li>\n" while current_level > 0: toc_html += "</ul>\n" current_level -= 1 return toc_html
def get_version()
-
Extract the version number of SFPPy from the VERSION.txt file located in ../utils/VERSION.txt.
Expand source code
def get_version(): """ Extract the version number of SFPPy from the VERSION.txt file located in ../utils/VERSION.txt. """ base_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), "..")) version_file = os.path.join(base_dir, "utils", "VERSION.txt") if not os.path.isfile(version_file): sys.stderr.write(f"Error: {version_file} not found. Please create a file with content: version=\"XX.YY.ZZ\"\n") sys.exit(1) with open(version_file, "r") as f: for line in f: line = line.strip() match = re.match(r'^version\s*=\s*"(.*?)"$', line) if match: return match.group(1) sys.stderr.write(f"Error: No valid version string found in {version_file}. Ensure it contains: version=\"XX.YY.ZZ\"\n") sys.exit(1)
def main()
-
Expand source code
def main(): if len(sys.argv) < 3: print("Usage: python autotoc.py input.html output.html [max_level]") sys.exit(1) input_file = sys.argv[1] output_file = sys.argv[2] max_level = 4 if len(sys.argv) >= 4: try: max_level = int(sys.argv[3]) except ValueError: print("Invalid max_level value, using default 4.") max_level = 4 with open(input_file, "r", encoding="utf-8") as f: html_content = f.read() new_html = add_toc_and_layout(html_content, max_level) with open(output_file, "w", encoding="utf-8") as f: f.write(new_html) print(f"TOC and layout added. Output written to {output_file}")
def slugify(text, existing_ids)
-
Generate a URL-friendly slug for a given text. If the slug already exists in existing_ids, append a suffix.
Expand source code
def slugify(text, existing_ids): """ Generate a URL-friendly slug for a given text. If the slug already exists in existing_ids, append a suffix. """ slug = re.sub(r'\s+', '-', text.lower()) slug = re.sub(r'[^a-z0-9\-]', '', slug) orig_slug = slug count = 1 while slug in existing_ids: slug = f"{orig_slug}-{count}" count += 1 existing_ids.add(slug) return slug