From ea8b2c96d633e4a732140d1902928dc8ba9a944f Mon Sep 17 00:00:00 2001 From: bonamin Date: Fri, 3 Apr 2026 17:46:39 +0300 Subject: [PATCH] feature: added archetype migration script --- .../migrate_pids_hyphens_to_underscores.py | 408 ++++++++++++++++++ 1 file changed, 408 insertions(+) create mode 100644 backend/scripts/migrate_pids_hyphens_to_underscores.py diff --git a/backend/scripts/migrate_pids_hyphens_to_underscores.py b/backend/scripts/migrate_pids_hyphens_to_underscores.py new file mode 100644 index 0000000..3f78bd9 --- /dev/null +++ b/backend/scripts/migrate_pids_hyphens_to_underscores.py @@ -0,0 +1,408 @@ +""" +One-time migration: replace hyphens with underscores in archetype PIDs and all +melody PIDs/URLs that reference them. + +What this script does: + 1. Renames each archetype's PID in SQLite (built_melodies.pid) + 2. Renames the local .bsm binary file on disk + 3. Updates built_melodies.binary_path in SQLite + 4. Regenerates built_melodies.progmem_code + 5. For every melody assigned to that archetype: + a. Downloads the .bsm bytes from Firebase Storage + b. Deletes the old blob + c. Re-uploads under the new PID name -> gets new public URL + d. Updates melody.pid (if it matched old archetype PID) + e. Updates melody.url -> new Firebase URL + f. Updates both SQLite AND Firestore (if melody is published) + +Run from the backend/ directory (or scripts/ — it searches upward for .env): + + python scripts/migrate_pids_hyphens_to_underscores.py --dry-run + python scripts/migrate_pids_hyphens_to_underscores.py + +All config is auto-loaded from the project .env file. No extra arguments needed. +Optional overrides: + --db Override SQLite database path + --dry-run Preview changes without writing anything + +Requires only: firebase-admin (pip install firebase-admin) +""" + +import argparse +import json +import os +import shutil +import sqlite3 +import sys +from datetime import datetime +from pathlib import Path + + +# --------------------------------------------------------------------------- +# .env loader — searches upward from script location for a .env file +# --------------------------------------------------------------------------- + +def _load_env() -> dict: + """Parse key=value pairs from the nearest .env file up the directory tree.""" + search = Path(__file__).resolve().parent + for _ in range(4): # look up to 4 levels up + env_file = search / ".env" + if env_file.exists(): + result = {} + for line in env_file.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, val = line.partition("=") + result[key.strip()] = val.strip().strip('"').strip("'") + print(f"[INFO] Loaded config from {env_file}") + return result + search = search.parent + print("[WARN] No .env file found — relying on environment variables") + return {} + + +_env = _load_env() + + +def _cfg(key: str, default: str = "") -> str: + """Get a config value: .env first, then os.environ, then default.""" + return _env.get(key) or os.environ.get(key) or default + +# --------------------------------------------------------------------------- +# Firebase (optional – skipped if not configured) +# --------------------------------------------------------------------------- +try: + import firebase_admin + from firebase_admin import credentials, firestore, storage as fb_storage + + _fb_app = None + + def _init_firebase(sa_path: str, bucket_name: str): + global _fb_app + if _fb_app is not None: + return + cred = credentials.Certificate(sa_path) + _fb_app = firebase_admin.initialize_app(cred, { + "storageBucket": bucket_name, + }) + + def get_firestore(sa_path: str, bucket_name: str): + _init_firebase(sa_path, bucket_name) + return firestore.client() + + def get_bucket(sa_path: str, bucket_name: str): + _init_firebase(sa_path, bucket_name) + return fb_storage.bucket() + + FIREBASE_AVAILABLE = True +except Exception as _fb_err: + print(f"[WARN] Firebase unavailable: {_fb_err}") + FIREBASE_AVAILABLE = False + + def get_firestore(sa_path: str, bucket_name: str): + return None + + def get_bucket(sa_path: str, bucket_name: str): + return None + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def fix_pid(pid: str) -> str: + """Replace hyphens with underscores in a PID string.""" + return pid.replace("-", "_") + + +def needs_fix(pid: str) -> bool: + return pid is not None and "-" in pid + + +def _is_binary_blob(name: str) -> bool: + lower = (name or "").lower() + base = lower.rsplit("/", 1)[-1] + if "preview" in base: + return False + return ("binary" in base) or base.endswith(".bin") or base.endswith(".bsm") + + +def _safe_seg(raw: str | None, fallback: str) -> str: + value = (raw or "").strip() or fallback + chars = [] + for ch in value: + if ch.isalnum() or ch in ("-", "_", "."): + chars.append(ch) + else: + chars.append("_") + cleaned = "".join(chars).strip("._") + return cleaned or fallback + + +def _storage_prefixes(melody_id: str, melody_uid: str | None) -> list[str]: + uid_seg = _safe_seg(melody_uid, melody_id) + id_seg = _safe_seg(melody_id, melody_id) + prefixes = [f"melodies/{uid_seg}/"] + if uid_seg != id_seg: + prefixes.append(f"melodies/{id_seg}/") + return prefixes + + +def _progmem_array(name: str, values: list[int], vpl: int = 8) -> str: + array_name = f"melody_builtin_{name.lower()}" + lines = [f"const uint16_t PROGMEM {array_name}[] = {{"] + for i in range(0, len(values), vpl): + chunk = values[i: i + vpl] + hex_vals = [f"0x{v:04X}" for v in chunk] + suffix = "," if i + len(chunk) < len(values) else "" + lines.append(" " + ", ".join(hex_vals) + suffix) + lines.append("};") + return "\n".join(lines) + + +def _parse_notation(token: str) -> int: + token = token.strip() + if not token or token == "0": + return 0 + v = 0 + for part in token.split("+"): + try: + n = int(part.strip()) + if 1 <= n <= 16: + v |= 1 << (n - 1) + except ValueError: + pass + return v + + +def _steps_to_values(steps: str) -> list[int]: + return [_parse_notation(s) for s in steps.split(",")] + + +def _regenerate_progmem(name: str, pid: str, steps: str) -> str: + values = _steps_to_values(steps) + array_name = f"melody_builtin_{name.lower()}" + id_name = pid if pid else f"builtin_{name.lower()}" + display_name = name.replace("_", " ").title() + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + parts = [ + f"// Generated: {ts}", + f"// Melody: {display_name} | PID: {id_name}", + "", + _progmem_array(name, values), + "", + "// --- Add this entry to your MELODY_LIBRARY[] array: ---", + "// {", + f'// "{display_name}",', + f'// "{id_name}",', + f"// {array_name},", + f"// sizeof({array_name}) / sizeof(uint16_t)", + "// }", + ] + return "\n".join(parts) + + +# --------------------------------------------------------------------------- +# Main migration +# --------------------------------------------------------------------------- + +def run(dry_run: bool = False, db_path: str = ""): + label = "[DRY-RUN]" if dry_run else "[LIVE]" + db_path = db_path or _cfg("SQLITE_DB_PATH", "./data/database.db") + sa_path = _cfg("FIREBASE_SERVICE_ACCOUNT_PATH", "./firebase-service-account.json") + bucket_name = _cfg("FIREBASE_STORAGE_BUCKET") + print(f"\n{label} Database: {db_path}") + print(f"{label} Firebase available: {FIREBASE_AVAILABLE}, bucket: {bucket_name or '(not set)'}\n") + + con = sqlite3.connect(db_path) + con.row_factory = sqlite3.Row + + # ----------------------------------------------------------------------- + # Step 1: collect archetypes that need fixing + # ----------------------------------------------------------------------- + archetypes = con.execute("SELECT * FROM built_melodies").fetchall() + to_fix = [dict(a) for a in archetypes if needs_fix(a["pid"])] + + if not to_fix: + print("No archetypes with hyphens in PID found. Nothing to do.") + con.close() + return + + print(f"Found {len(to_fix)} archetype(s) with hyphens in PID:\n") + for a in to_fix: + print(f" [{a['id'][:8]}...] '{a['pid']}' → '{fix_pid(a['pid'])}' (name: {a['name']})") + print() + + bucket = get_bucket(sa_path, bucket_name) if FIREBASE_AVAILABLE and bucket_name else None + firestore_db = get_firestore(sa_path, bucket_name) if FIREBASE_AVAILABLE and bucket_name else None + + total_melodies_updated = 0 + + for archetype in to_fix: + old_pid = archetype["pid"] + new_pid = fix_pid(old_pid) + arch_id = archetype["id"] + arch_name = archetype["name"] + assigned_ids: list[str] = json.loads(archetype["assigned_melody_ids"] or "[]") + + print(f"━━━ Archetype: {arch_name} ({old_pid} → {new_pid}) ━━━") + + # ------------------------------------------------------------------- + # Step 2: rename local .bsm file + # ------------------------------------------------------------------- + old_path = Path(archetype["binary_path"]) if archetype.get("binary_path") else None + new_path = None + + if old_path and old_path.exists(): + new_path = old_path.parent / f"{new_pid}.bsm" + print(f" [BSM] {old_path.name} → {new_path.name}") + if not dry_run: + shutil.move(str(old_path), str(new_path)) + elif old_path: + # File expected but missing — still derive new path so DB is correct + new_path = old_path.parent / f"{new_pid}.bsm" + print(f" [BSM] WARNING: expected file not found: {old_path}") + else: + print(f" [BSM] No binary_path recorded, skipping file rename") + + # ------------------------------------------------------------------- + # Step 3 & 4: update SQLite — pid, binary_path, progmem_code + # ------------------------------------------------------------------- + new_progmem = _regenerate_progmem(arch_name, new_pid, archetype["steps"]) + print(f" [DB] Updating archetype record in SQLite") + if not dry_run: + con.execute( + "UPDATE built_melodies SET pid=?, binary_path=?, progmem_code=?, updated_at=? WHERE id=?", + (new_pid, str(new_path) if new_path else archetype["binary_path"], + new_progmem, datetime.utcnow().isoformat(), arch_id), + ) + con.commit() + + # ------------------------------------------------------------------- + # Step 5–7: update each assigned melody + # ------------------------------------------------------------------- + if not assigned_ids: + print(f" [MELODIES] No assigned melodies, skipping.\n") + continue + + print(f" [MELODIES] Processing {len(assigned_ids)} assigned melody(ies)...") + + for melody_id in assigned_ids: + row = con.execute("SELECT * FROM melody_drafts WHERE id=?", (melody_id,)).fetchone() + if not row: + print(f" [{melody_id[:8]}] WARNING: melody not found in SQLite, skipping") + continue + + row = dict(row) + melody_data: dict = json.loads(row["data"]) if isinstance(row["data"], str) else row["data"] + melody_uid = melody_data.get("uid") + melody_pid = melody_data.get("pid", "") + melody_url = melody_data.get("url", "") + status = row.get("status", "draft") + + # Determine if this melody's pid also has hyphens matching old archetype pid + new_melody_pid = fix_pid(melody_pid) if melody_pid and "-" in melody_pid else melody_pid + + new_url = melody_url # will be updated if Firebase succeeds + + # --------------------------------------------------------------- + # Firebase Storage: delete old blob, re-upload under new name + # --------------------------------------------------------------- + if bucket and melody_url: + try: + prefixes = _storage_prefixes(melody_id, melody_uid) + primary_prefix = prefixes[0] + + # Find and download the current binary blob + all_blobs = [] + for prefix in prefixes: + all_blobs.extend(list(bucket.list_blobs(prefix=prefix))) + binary_blobs = [b for b in all_blobs if _is_binary_blob(b.name)] + + if binary_blobs: + # Download bytes from the first (should only be one) + src_blob = binary_blobs[0] + binary_bytes = src_blob.download_as_bytes() + + new_storage_path = f"{primary_prefix}{new_pid}.bsm" + print(f" [{melody_id[:8]}] Storage: {src_blob.name.split('/')[-1]} → {new_pid}.bsm") + + if not dry_run: + # Delete old blob(s) + for b in binary_blobs: + b.delete() + + # Upload under new name + new_blob = bucket.blob(new_storage_path) + new_blob.upload_from_string(binary_bytes, content_type="application/octet-stream") + new_blob.make_public() + new_url = new_blob.public_url + else: + print(f" [{melody_id[:8]}] WARNING: no binary blob found in storage for this melody") + + except Exception as e: + print(f" [{melody_id[:8]}] ERROR during Firebase Storage operation: {e}") + elif not bucket: + print(f" [{melody_id[:8]}] Firebase not available, skipping storage rename") + + # --------------------------------------------------------------- + # Update melody data + # --------------------------------------------------------------- + changed = False + if new_melody_pid != melody_pid: + print(f" [{melody_id[:8]}] PID: '{melody_pid}' → '{new_melody_pid}'") + melody_data["pid"] = new_melody_pid + changed = True + + if new_url != melody_url: + print(f" [{melody_id[:8]}] URL updated") + melody_data["url"] = new_url + changed = True + + if not changed and new_url == melody_url: + print(f" [{melody_id[:8]}] No data changes needed") + continue + + if not dry_run: + # Update SQLite + con.execute( + "UPDATE melody_drafts SET data=? WHERE id=?", + (json.dumps(melody_data), melody_id), + ) + con.commit() + + # Update Firestore if published + if status == "published" and firestore_db: + try: + doc_ref = firestore_db.collection("melodies").document(melody_id) + update_fields = {} + if new_melody_pid != melody_pid: + update_fields["pid"] = new_melody_pid + if new_url != melody_url: + update_fields["url"] = new_url + if update_fields: + doc_ref.update(update_fields) + print(f" [{melody_id[:8]}] Firestore updated") + except Exception as e: + print(f" [{melody_id[:8]}] ERROR updating Firestore: {e}") + elif status == "published" and not firestore_db: + print(f" [{melody_id[:8]}] WARNING: melody is published but Firestore unavailable!") + + total_melodies_updated += 1 + + print() + + con.close() + print(f"{'━'*60}") + print(f"{label} Done. Archetypes fixed: {len(to_fix)}, Melody records updated: {total_melodies_updated}") + if dry_run: + print("\nThis was a dry run. No changes were made. Run without --dry-run to apply.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Migrate archetype/melody PIDs: replace hyphens with underscores") + parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing anything") + parser.add_argument("--db", default="", help="Override SQLite database path (default: read from .env)") + args = parser.parse_args() + run(dry_run=args.dry_run, db_path=args.db)