diff --git a/.gitignore b/.gitignore index 208414c..a7bb800 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,11 @@ Temporary Items *.sln *.sw? +# Backup files +*.bak +*.bak.* +*.tmp + # Svelte related .svelte-kit/ @@ -56,3 +61,4 @@ Temporary Items # Make favicon generation script executable chmod +x ./scripts/generate-favicons.js chmod +x ./scripts/update-data.js +chmod +x ./scripts/* diff --git a/public/data/MapChart_Map.svg b/public/data/MapChart_Map.svg new file mode 100644 index 0000000..b7f62b7 --- /dev/null +++ b/public/data/MapChart_Map.svg @@ -0,0 +1,4443 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/public/data/worldmap.svg b/public/data/worldmap.svg new file mode 100644 index 0000000..62a373a --- /dev/null +++ b/public/data/worldmap.svg @@ -0,0 +1,562 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/public/data/worldmap_orig.svg b/public/data/worldmap_orig.svg new file mode 100644 index 0000000..97ea70d --- /dev/null +++ b/public/data/worldmap_orig.svg @@ -0,0 +1,24 @@ + + + + + image/svg+xml + + MapChart Map + + + MapChart.net + + + + This map was created with MapChart.net, a website for creating custom maps. + + + + + + + + + + Created with mapchart.net \ No newline at end of file diff --git a/scripts/cleanup_worldmap.py b/scripts/cleanup_worldmap.py new file mode 100644 index 0000000..6b44c45 --- /dev/null +++ b/scripts/cleanup_worldmap.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +""" +SVG cleanup utility for worldmap.svg + +This script performs a series of conservative, text-based transforms on +an SVG file to normalize path tags, strip unwanted attributes, add +data-iso attributes when possible, and pretty-print the result. + +Usage: + python3 scripts/cleanup_worldmap.py [--in-place] + +By default the script does a dry run and prints a small preview. Use +--in-place to overwrite the file (a timestamped backup will be created). +""" + +from pathlib import Path +import re +import sys +import argparse +import datetime +from xml.dom import minidom +from xml.parsers.expat import ExpatError +import xml.etree.ElementTree as ET +from xml.dom import Node +import json +import unicodedata + +FILE_PATH = Path("public/data/worldmap.svg") +ISO_JSON = FILE_PATH.parent / "ISO3166-1.json" + +def read_text(p: Path) -> str: + """Read a text file using UTF-8 and return its contents as a string.""" + return p.read_text(encoding="utf-8") + + +def write_text(p: Path, s: str) -> None: + """Write the given string to path using UTF-8 encoding.""" + p.write_text(s, encoding="utf-8") + + +def backup(p: Path) -> Path: + """Create a timestamped backup of path and return the backup Path.""" + ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + bak = p.with_suffix(p.suffix + f".bak.{ts}") + bak.write_text(p.read_text(encoding="utf-8"), encoding="utf-8") + return bak + + +def validate_xml(s: str) -> (bool, str): + """Validate that the provided string is well-formed XML using minidom. + + Returns (True, "OK") on success or (False, error_message) on failure. + """ + try: + minidom.parseString(s) + return True, "OK" + except ExpatError as e: + return False, str(e) + except Exception as e: + return False, str(e) + + +def normalize_name(s: str) -> str: + """Normalize a country name for loose matching. + + Removes diacritics, lowercases, replaces punctuation with spaces and + collapses runs of whitespace. + """ + if not s: + return "" + s = unicodedata.normalize("NFKD", s) + s = "".join(ch for ch in s if not unicodedata.combining(ch)) + s = s.lower() + s = s.replace("&", " and ") + s = re.sub(r"[^a-z0-9]+", " ", s) + return re.sub(r"\s+", " ", s).strip() + +def extract_inner_svg(svg: str) -> str: + """If the file contains a nested , extract and return the inner SVG block. + + This helps when the source file wraps the actual map inside an outer svg. + """ + m = re.search(r"', match.start()) + if gt == -1: + return svg + return svg[inner_open: gt + 1] + return svg + + +def collapse_path_tags(svg: str) -> str: + """Replace full ... pairs with compact self-closing tags.""" + return re.sub(r"]*)>\s*", r"", svg, flags=re.IGNORECASE | re.DOTALL) + + +def split_attributes_multiline(svg: str) -> str: + """Format attributes of and tags so each attribute appears on its own indented line. + + This is purely for editor readability; it doesn't change element names or attribute values. + """ + # attributes that can be very long and benefit from value-wrapping + LONG_ATTRS = {"d", "points", "style"} + MAX_WIDTH = 120 + + attr_pair_re = re.compile(r'([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*"([^"]*)"', flags=re.DOTALL) + + def wrap_value(name: str, val: str) -> str: + """Wrap long attribute values into newline-separated chunks inside the quotes. + + We break on spaces to avoid splitting tokens. Returns the wrapped value (no surrounding quotes). + """ + if not val: + return val + if name not in LONG_ATTRS or len(val) <= MAX_WIDTH: + return val + parts = val.split() + lines = [] + cur = [] + for p in parts: + if cur and len(" ".join(cur + [p])) > MAX_WIDTH: + lines.append(" ".join(cur)) + cur = [p] + else: + cur.append(p) + if cur: + lines.append(" ".join(cur)) + # indent wrapped lines with two spaces so they align under attribute + return "\n ".join(lines) + + def repl(m): + tag = m.group(1) + attrs = m.group(2) or "" + closing = m.group(3) or ">" + attrs = attrs.strip() + if not attrs: + return f"<{tag}{closing}" + + pieces = [] + for am in attr_pair_re.finditer(attrs): + aname = am.group(1) + aval = am.group(2) + wval = wrap_value(aname, aval) + if "\n" in wval: + # keep newline inside quoted value; indent continuation lines + piece = f'{aname}="{wval}"' + else: + piece = f'{aname}="{wval}"' + pieces.append(piece) + + # keep any remaining raw text (rare) appended + tail = attr_pair_re.sub("", attrs).strip() + if tail: + pieces.append(tail) + + lines = [f"<{tag}"] + for p in pieces: + # if the attribute value contains internal newlines, ensure it's indented properly + if "\n" in p: + # split first line and continuation + idx = p.find('="') + name = p[:idx] + val = p[idx+2:-1] + first, *rest = val.split('\n') + lines.append(f" {name}=\"{first}\"") + for r in rest: + lines.append(f" {r}") + else: + lines.append(f" {p}") + + lines.append(closing) + return "\n".join(lines) + + return re.sub(r"<(path|g)\b([^>]*)\s*(/?>)", repl, svg, flags=re.IGNORECASE | re.DOTALL) + + +def add_svg_attributes(svg: str) -> str: + """Ensure the root tag has fill, stroke and stroke-width attributes. + + This updates (or inserts) only the specified attributes on the opening + tag and preserves any other existing attributes. + """ + def repl(m): + start, attrs, end = m.group(1), m.group(2), m.group(3) + # remove existing occurrences of these specific attributes (only on svg) + attrs = re.sub(r"\sfill\s*=\s*\"[^\"]*\"", "", attrs, flags=re.IGNORECASE) + attrs = re.sub(r"\sstroke\s*=\s*\"[^\"]*\"", "", attrs, flags=re.IGNORECASE) + attrs = re.sub(r"\sstroke-width\s*=\s*\"[^\"]*\"", "", attrs, flags=re.IGNORECASE) + attrs = re.sub(r"\s+", " ", attrs).strip() + mid = f" {attrs}" if attrs else "" + # add/overwrite desired attributes + return f"{start}{mid} fill=\"#fff\" stroke=\"#000\" stroke-width=\"0.2\"{end}" + + return re.sub(r"(]*?)(/?>)", repl, svg, flags=re.IGNORECASE | re.DOTALL) + + +def collapse_newlines_after_svg(svg: str) -> str: + """Ensure there is exactly one newline immediately after the opening tag. + + This prevents the script from accumulating blank lines between the + opening and the first child element across multiple runs. + """ + pattern = re.compile(r'(]*>)\s*\n+', flags=re.IGNORECASE) + return pattern.sub(r'\1\n', svg, count=1) + + +def strip_whitespace_text_nodes(node): + """Recursively remove text nodes that contain only whitespace from a DOM node. + + This reduces extra blank lines produced by minidom.toprettyxml when + the source contains whitespace-only text nodes between elements. + """ + for child in list(node.childNodes): + if child.nodeType == Node.TEXT_NODE: + if not child.data.strip(): + node.removeChild(child) + continue + if child.hasChildNodes(): + strip_whitespace_text_nodes(child) + + +def remove_defs(svg: str) -> str: + """Remove any ... blocks from the SVG (case-insensitive).""" + return re.sub(r"]*>.*?", "", svg, flags=re.IGNORECASE | re.DOTALL) + + +def remove_data_geo(svg: str) -> str: + # only operate on path and g opening tags + def repl(m): + """Replace function used to strip data-geo* attributes from a tag match.""" + start, attrs, end = m.group(1), m.group(2), m.group(3) + attrs2 = re.sub(r"\sdata-geo[-\w]*\s*=\s*\"[^\"]*\"", "", attrs, flags=re.IGNORECASE) + attrs2 = re.sub(r"\s+", " ", attrs2).strip() + return f"{start} {attrs2}{end}" if attrs2 else f"{start}{end}" + return re.sub(r"(<(?:path|g)\b)([^>]*?)(/?>)", repl, svg, flags=re.IGNORECASE | re.DOTALL) + + +def remove_original_strokewidth(svg: str) -> str: + """Remove data-originalStrokeWidth attributes from the SVG text.""" + return re.sub(r"\sdata-originalStrokeWidth\s*=\s*\"[^\"]*\"", "", svg, flags=re.IGNORECASE) + + +def uppercase_data_iso(svg: str) -> str: + """Uppercase all data-iso attribute values for consistency.""" + return re.sub(r'data-iso\s*=\s*"([^\"]*)"', lambda m: f'data-iso="{m.group(1).strip().upper()}"', svg, flags=re.IGNORECASE) + + +def clear_fill_stroke(svg: str) -> str: + """Remove inline fill, stroke, stroke-width and filter/style entries from path/g tags.""" + def repl(m): + start, attrs, end = m.group(1), m.group(2), m.group(3) + # remove explicit attributes + attrs = re.sub(r"\s(?:fill|stroke|stroke-width)\s*=\s*\"[^\"]*\"", "", attrs, flags=re.IGNORECASE) + # strip fill/stroke/filter from style + def style_repl(mm): + """Clean style attribute content by removing fill/stroke/filter entries.""" + style = mm.group(1) + props = [p.strip() for p in style.split(";") if p.strip()] + keep = [p for p in props if p.split(":", 1)[0].strip().lower() not in ("fill", "stroke", "filter", "stroke-width")] + if not keep: + return "" + return f'style="{";".join(keep)}"' + attrs = re.sub(r'style\s*=\s*"([^"]*)"', style_repl, attrs, flags=re.IGNORECASE) + attrs = re.sub(r"\s+", " ", attrs).strip() + return f"{start} {attrs}{end}" if attrs else f"{start}{end}" + return re.sub(r"(<(?:path|g)\b)([^>]*?)(/?>)", repl, svg, flags=re.IGNORECASE | re.DOTALL) + + +def remove_empty_groups(svg: str) -> str: + """Remove empty ... groups from the SVG to tidy the markup.""" + return re.sub(r"]*)>\s*", "", svg, flags=re.IGNORECASE | re.DOTALL) + + +def add_data_iso(svg: str, iso_path: Path) -> str: + """Try to infer and add data-iso attributes using an ISO JSON mapping. + + Looks at id, name, data-name attributes or inner to guess a country + name and maps it to an ISO alpha-2 code using the provided JSON. + """ + if not iso_path.exists(): + return svg + try: + with iso_path.open(encoding="utf-8") as fh: + mapping = json.load(fh) + except Exception: + return svg + norm_map = { normalize_name(v): k.upper() for k, v in mapping.items() if v } + + # Prefer an XML-aware edit: parse and modify elements, then serialize. + try: + root = ET.fromstring(svg) + # detect and register default namespace if present so ET doesn't + # emit ns0 prefixes when serializing + ns_uri = None + if isinstance(root.tag, str) and root.tag.startswith('{'): + ns_uri = root.tag.split('}')[0].strip('{') + elif 'xmlns' in root.attrib: + ns_uri = root.attrib.get('xmlns') + if ns_uri: + ET.register_namespace('', ns_uri) + + # iterate over all elements and handle local tag names (ignore namespace) + for elem in root.iter(): + tag = elem.tag + local = tag.split('}', 1)[1] if '}' in tag else tag + if local not in ('path', 'g'): + continue + if 'data-iso' in elem.attrib: + continue + # candidate sources + cand = elem.get('id') or elem.get('name') or elem.get('data-name') + if not cand: + title = None + for child in elem: + ctag = child.tag + c_local = ctag.split('}', 1)[1] if '}' in ctag else ctag + if c_local == 'title' and child.text: + title = child.text + break + cand = title + if not cand: + continue + code = norm_map.get(normalize_name(cand)) + if code: + elem.set('data-iso', code) + + # serialize back to a string; namespace registration prevents ns0 prefixes + return ET.tostring(root, encoding='unicode') + except Exception: + # fallback: keep the original regex-based approach (conservative) + # non-self-closing + def repl_pair(m): + start, attrs, inner = m.group(1), m.group(2), m.group(3) + if re.search(r'data-iso\s*=\s*"[^\"]*"', attrs, flags=re.IGNORECASE): + return m.group(0) + cand = None + for pat in (r'id\s*=\s*"([^\"]*)"', r'name\s*=\s*"([^\"]*)"', r'data-name\s*=\s*"([^\"]*)"'): + mm = re.search(pat, attrs, flags=re.IGNORECASE) + if mm: + cand = mm.group(1) + break + if not cand: + t = re.search(r"(.*?)", inner, flags=re.IGNORECASE | re.DOTALL) + if t: + cand = t.group(1) + if not cand: + return m.group(0) + code = norm_map.get(normalize_name(cand)) + if not code: + return m.group(0) + tag = start[1:] + attrs_str = attrs.strip() + mid = f" {attrs_str}" if attrs_str else "" + return f"{start}{mid} data-iso=\"{code}\">{inner}" + + svg = re.sub(r"(<(?:path|g)\b)([^>]*?)>(.*?)", repl_pair, svg, flags=re.IGNORECASE | re.DOTALL) + + # self-closing + def repl_self(m): + start, attrs, tail = m.group(1), m.group(2), m.group(3) + if re.search(r'data-iso\s*=\s*"[^\"]*"', attrs, flags=re.IGNORECASE): + return m.group(0) + cand = None + for pat in (r'id\s*=\s*"([^\"]*)"', r'name\s*=\s*"([^\"]*)"', r'data-name\s*=\s*"([^\"]*)"'): + mm = re.search(pat, attrs, flags=re.IGNORECASE) + if mm: + cand = mm.group(1) + break + if not cand: + return m.group(0) + code = norm_map.get(normalize_name(cand)) + if not code: + return m.group(0) + attrs_str = attrs.strip() + mid = f" {attrs_str}" if attrs_str else "" + return f"{start}{mid} data-iso=\"{code}\"{tail}" + + svg = re.sub(r"(<(?:path|g)\b)([^>]*?)(/>)", repl_self, svg, flags=re.IGNORECASE | re.DOTALL) + return svg + +# ---------------------- main flow ---------------------- + +def main(argv=None): + """CLI entrypoint: parse arguments, run the pipeline and optionally write the file.""" + parser = argparse.ArgumentParser() + parser.add_argument("--in-place", action="store_true") + parser.add_argument("--file", type=Path, default=FILE_PATH) + args = parser.parse_args(argv) + + svg_path = args.file + if not svg_path.exists(): + print("SVG not found:", svg_path) + return 2 + + original = read_text(svg_path) + + # extract text blocks to protect them + text_blocks = [] + def extract_text(s): + """Extract ... blocks and replace them with unique markers.""" + nonlocal text_blocks + pat = re.compile(r"(]*>.*?)", flags=re.IGNORECASE | re.DOTALL) + def r(m): + idx = len(text_blocks) + text_blocks.append(m.group(1)) + return f"" + return pat.sub(r, s) + + def restore_text(s): + """Restore previously extracted blocks back into the SVG string.""" + for i, b in enumerate(text_blocks): + s = s.replace(f"", b) + return s + + svg = extract_text(original) + + steps = [ + (extract_inner_svg, "extract_inner_svg"), + (add_svg_attributes, "add_svg_attributes"), + (collapse_path_tags, "collapse_path_tags"), + (remove_defs, "remove_defs"), + (lambda s: add_data_iso(s, ISO_JSON), "add_data_iso"), + (remove_data_geo, "remove_data_geo"), + (remove_original_strokewidth, "remove_original_strokewidth"), + (uppercase_data_iso, "uppercase_data_iso"), + (clear_fill_stroke, "clear_fill_stroke"), + (remove_empty_groups, "remove_empty_groups"), + (lambda s: s, "noop_compact"), + ] + + last_good = svg + for func, name in steps: + print(f"[stage] {name}") + try: + svg = func(svg) + except Exception as e: + print(f"ERROR in {name}: {e}") + svg = last_good + break + ok, msg = validate_xml(svg) + if not ok: + print(f"Invalid XML after {name}: {msg}") + svg = last_good + break + last_good = svg + + # pretty print + print("[stage] pretty_format") + try: + dom = minidom.parseString(svg) + # remove whitespace-only text nodes to avoid accumulating blank lines + strip_whitespace_text_nodes(dom) + pretty = dom.toprettyxml(indent=" ") + # remove xml decl if present + if pretty.startswith("