Hello everyone! Laura here
I’m a brand new user of Bike, and so far I’m loving the app (and plan to get a license soon – thank you @jessegrosjean for your hard work on Bike – but at the moment I’m broke since most of my money went to buying a MacBook Neo). But since I also have Scrivener installed, I found that the way Bike creates OPML files to be janky under the hood (and thus incompatible with other OPML apps). So with the help of Claude AI, I created Mundus, a little Python script that allows you to turn OPML files created in Bike to proper OPML 2.0 specifications. The code is down below:
"""
mundus.py — Cleans up a messy OPML file and makes it proper OPML 2.0.
(Mundus: Latin for 'clean' and 'world' — cleaning up your messy OPML world!)
Usage:
python mundus.py
It will ask you a few questions interactively!
What it fixes:
- Adds version="2.0" to the <opml> tag if missing
- Removes non-standard tags from <head> (e.g. <meta charset>)
- Adds a <title> to <head> if none exists (using the filename)
- Removes non-standard attributes from <body> (e.g. id="...")
- Warns you about any <outline> text that looks cut off (ends mid-sentence)
- Optional strict mode: Removes non-standard attributes from <outline> elements
- Optional notes mode: Turns child text outlines into _note attributes
- Fixes indentation so the file is nicely readable
Requires:
pip install colorama
"""
import sys
import os
from xml.etree import ElementTree as ET
from colorama import init, Fore, Style
# autoreset=True means every print() automatically goes back to normal colour
init(autoreset=True)
# ── Handy shortcuts for coloured prefixes ─────────────────────────────────────
OK = Style.BRIGHT + Fore.GREEN + "[ OK ]" + Style.RESET_ALL + " "
FIX = Style.BRIGHT + Fore.CYAN + "[FIX ]" + Style.RESET_ALL + " "
WARN = Style.BRIGHT + Fore.YELLOW + "[WARN]" + Style.RESET_ALL + " "
ERR = Style.BRIGHT + Fore.RED + "[ERR ]" + Style.RESET_ALL + " "
INFO = Style.BRIGHT + Fore.BLUE + "[INFO]" + Style.RESET_ALL + " "
DONE = Style.BRIGHT + Fore.MAGENTA + "[DONE]" + Style.RESET_ALL + " "
# ── Helpers ──────────────────────────────────────────────────────────────────
def looks_cut_off(text: str) -> bool:
"""
Returns True if the text looks like it ends mid-sentence.
"""
stripped = text.strip()
if not stripped:
return False
word_count = len(stripped.split())
if word_count <= 6:
return False
ending_ok = ('.', '!', '?', '…', '"', '\u201d', "'", ')', ']', '-')
return not stripped.endswith(ending_ok)
def process_outlines(elem):
"""
Recursively processes outlines to merge note-like children/siblings
into _note attributes. Returns the number of notes merged.
"""
notes_merged = 0
# First, process all children recursively so bottom-up merging happens
for child in list(elem):
notes_merged += process_outlines(child)
# Now process the direct children of this element
new_children = []
i = 0
children = list(elem)
while i < len(children):
child = children[i]
# If it's an RSS feed, leave it entirely alone
if child.get("xmlUrl"):
new_children.append(child)
i += 1
continue
child_text = child.get("text", "").strip()
child_word_count = len(child_text.split())
is_long = child_word_count > 6
has_children = len(child) > 0
# Is this outline a "heading"? (Short text, or has sub-headings/feeds)
if not is_long or has_children:
# Check if the next siblings are "notes" (long text, no children)
notes = []
j = i + 1
while j < len(children):
sibling = children[j]
sibling_text = sibling.get("text", "").strip()
sibling_word_count = len(sibling_text.split())
is_sibling_long = sibling_word_count > 6
has_sibling_children = len(sibling) > 0
if (not sibling.get("xmlUrl") and
is_sibling_long and
not has_sibling_children):
notes.append(sibling_text)
j += 1
else:
break # Next sibling is a heading or RSS feed, stop.
if notes:
# Merge these notes into the heading's _note attribute
existing_note = child.get("_note", "")
if existing_note:
existing_note += "\n\n"
child.set("_note", existing_note + "\n\n".join(notes))
notes_merged += len(notes)
# The notes are consumed, so we skip them in the loop
i = j
else:
i += 1
new_children.append(child)
else:
# This is a long, childless outline, BUT it's not following a heading.
# If it's the ONLY child, it's probably the parent's note.
if len(children) == 1 and not elem.get("xmlUrl"):
existing_note = elem.get("_note", "")
if existing_note:
existing_note += "\n\n"
elem.set("_note", existing_note + child_text)
notes_merged += 1
# Don't add child to new_children, it's merged into parent!
i += 1
else:
# It's a standalone long outline among other things.
# Leave it as a child outline to be safe.
new_children.append(child)
i += 1
# Replace the old children with the new processed children
elem[:] = new_children
return notes_merged
# ── Main fixer ────────────────────────────────────────────────────────────────
def fix_opml(input_path: str, output_path: str, strict: bool, make_notes: bool):
print(f"\n{INFO}Reading: {input_path}")
try:
tree = ET.parse(input_path)
except ET.ParseError as e:
print(f"\n{ERR}Could not read the file — it has broken XML: {e}")
sys.exit(1)
root = tree.getroot()
# ── Stats tracking for the final summary ──────────────────────────────────
stats = {
"version_fixed": False,
"head_tags_removed": 0,
"title_added": False,
"body_attrs_removed": 0,
"outline_attrs_removed": 0,
"notes_merged": 0,
"cut_off_warnings": 0
}
warnings = []
# ── 1. Fix <opml> root tag ────────────────────────────────────────────────
if root.tag != "opml":
print(f"{ERR}This doesn't look like an OPML file (root tag isn't <opml>). Stopping.")
sys.exit(1)
if root.get("version") != "2.0":
old_ver = root.get("version", "none")
root.set("version", "2.0")
stats["version_fixed"] = True
# ── 2. Fix <head> ─────────────────────────────────────────────────────────
head = root.find("head")
if head is None:
head = ET.SubElement(root, "head")
# Remove non-standard elements (like <meta charset="utf-8"/>)
allowed_head_tags = {
"title", "dateCreated", "dateModified",
"ownerName", "ownerEmail", "ownerId", "docs",
"expansionState", "vertScrollState",
"windowTop", "windowLeft", "windowBottom", "windowRight"
}
to_remove = [child for child in head if child.tag not in allowed_head_tags]
for child in to_remove:
head.remove(child)
stats["head_tags_removed"] += 1
# Add <title> if missing
if head.find("title") is None:
default_title = os.path.splitext(os.path.basename(input_path))[0]
title_elem = ET.SubElement(head, "title")
title_elem.text = default_title
stats["title_added"] = True
# ── 3. Fix <body> ─────────────────────────────────────────────────────────
body = root.find("body")
if body is None:
print(f"{ERR}No <body> found in this OPML file. Stopping.")
sys.exit(1)
# Remove ALL attributes from <body> (OPML spec says it shouldn't have any)
if body.attrib:
stats["body_attrs_removed"] = len(body.attrib)
body.attrib.clear()
# ── 4. Fix <outline> elements ─────────────────────────────────────────────
all_outlines = root.iter("outline")
# Added _note and note to the allowed list so they don't get stripped!
allowed_outline_attrs = {"text", "type", "xmlUrl", "htmlUrl", "description",
"language", "title", "version", "url", "created",
"isComment", "isBreakpoint", "category", "_note", "note"}
for outline in all_outlines:
text = outline.get("text", "")
# Warn about cut-off text
if looks_cut_off(text):
warnings.append(f'"{text}"')
stats["cut_off_warnings"] += 1
# Only strip attributes if the user chose STRICT mode
if strict:
extras = [a for a in outline.attrib if a not in allowed_outline_attrs]
for attr in extras:
del outline.attrib[attr]
stats["outline_attrs_removed"] += 1
# ── 5. Convert to _notes (if requested) ───────────────────────────────────
if make_notes:
stats["notes_merged"] = process_outlines(body)
# ── 6. Pretty-print ───────────────────────────────────────────────────────
ET.indent(root, space=" ")
# ── 7. Write output ───────────────────────────────────────────────────────
# We write to a string first so we can fix the ugly newlines in attributes
xml_string = ET.tostring(root, encoding="UTF-8", xml_declaration=True).decode("utf-8")
xml_string = xml_string.replace(" ", "\n")
with open(output_path, "w", encoding="UTF-8") as f:
f.write(xml_string)
# ── 8. Print Warnings (if any) ────────────────────────────────────────────
if warnings:
print(f"\n{Fore.YELLOW}{'─'*55}{Style.RESET_ALL}")
print(f"{WARN}Found {stats['cut_off_warnings']} outline(s) that may be cut off.")
print(f" Please check these manually and fill in the missing text:\n")
for w in warnings:
print(f" {Fore.YELLOW}{w}{Style.RESET_ALL}\n")
print(f"{Fore.YELLOW}{'─'*55}{Style.RESET_ALL}")
# ── 9. Print Final Summary ────────────────────────────────────────────────
print(f"\n{DONE}{'─'*43}")
print(f"{DONE} MUNDUS CLEANING REPORT")
print(f"{DONE}{'─'*43}")
ver_status = f"{Fore.GREEN}Yes{Style.RESET_ALL}" if stats["version_fixed"] else f"{Fore.YELLOW}No (already 2.0){Style.RESET_ALL}"
title_status = f"{Fore.GREEN}Yes{Style.RESET_ALL}" if stats["title_added"] else f"{Fore.YELLOW}No (already existed){Style.RESET_ALL}"
print(f" OPML version updated to 2.0: {ver_status}")
print(f" Non-standard <head> tags removed: {Fore.CYAN}{stats['head_tags_removed']}{Style.RESET_ALL}")
print(f" Missing <title> added: {title_status}")
print(f" Non-standard <body> attrs removed:{Fore.CYAN}{stats['body_attrs_removed']}{Style.RESET_ALL}")
if strict:
print(f" [Strict] Outline attrs removed: {Fore.CYAN}{stats['outline_attrs_removed']}{Style.RESET_ALL}")
else:
print(f" [Strict] Outline attrs removed: {Style.DIM}Off{Style.RESET_ALL}")
if make_notes:
print(f" [Notes] Text blocks merged: {Fore.CYAN}{stats['notes_merged']}{Style.RESET_ALL}")
else:
print(f" [Notes] Text blocks merged: {Style.DIM}Off{Style.RESET_ALL}")
warn_color = Fore.YELLOW if stats['cut_off_warnings'] > 0 else Fore.GREEN
print(f" Cut-off text warnings: {warn_color}{stats['cut_off_warnings']}{Style.RESET_ALL}")
print(f"{DONE}{'─'*43}")
print(f"\n{OK}Saved fixed file to: {output_path}")
print(f"\n{DONE}Mundus complete! Your OPML world is clean and valid.\n")
# ── Entry point ───────────────────────────────────────────────────────────────
def main():
print(f"\n{INFO}Welcome to Mundus! Let's clean up your OPML world.")
# 1. Ask for input file
while True:
input_path = input(f"\n{Style.BRIGHT}Where is the messy OPML file?{Style.RESET_ALL} ").strip()
if not input_path:
print(f"{ERR}Please enter a file path.")
continue
if not os.path.isfile(input_path):
print(f"{ERR}File not found: {input_path}")
continue
break
# 2. Ask for output file
base, ext = os.path.splitext(input_path)
# Here is the change! Using " (clean)" instead of "_fixed"
default_output = f"{base} (clean){ext or '.opml'}"
output_path = input(f"{Style.BRIGHT}Where should I save the fixed file?{Style.RESET_ALL} (Press Enter for: {default_output}) ").strip()
if not output_path:
output_path = default_output
# 3. Ask for strict mode
strict_input = input(f"{Style.BRIGHT}Strict mode?{Style.RESET_ALL} Deletes extra attributes like 'id'. (y/N) ").strip().lower()
strict = strict_input == 'y'
# 4. Ask for notes mode
notes_input = input(f"{Style.BRIGHT}Notes mode?{Style.RESET_ALL} Turns child text into _note attributes. (y/N) ").strip().lower()
make_notes = notes_input == 'y'
# Run the fixer
fix_opml(input_path, output_path, strict, make_notes)
if __name__ == "__main__":
main()
Let me know what you all think.
