diff --git a/api/composers.py b/api/composers.py index dc85450..9a8b2bb 100644 --- a/api/composers.py +++ b/api/composers.py @@ -78,7 +78,12 @@ def list_composers(): try: local = WorkspaceLocalComposer.from_dict(c) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) continue # Use the typed view downstream so the dataclass is # load-bearing, not just a filter (Brad's review): the @@ -91,9 +96,20 @@ def list_composers(): c["workspaceFolder"] = workspace_folder composers.append((local, c)) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) except Exception as e: - print(f"Failed reading composers from {db_path}: {e}") + _logger.error( + "Failed reading composers from %s: %s (%s)", + db_path, + e, + type(e).__name__, + exc_info=True, + ) composers.sort(key=lambda pair: to_epoch_ms(pair[0].last_updated_at), reverse=True) return jsonify([c for _, c in composers]) @@ -152,7 +168,12 @@ def get_composer(composer_id): # Same drift list_composers() logs and skips at line ~78, # so a single-composer fetch can't silently return malformed # JSON the list endpoint hid. - print(f"Schema drift in workspace-local composer {composer_id}: {e}") + _logger.warning( + "Schema drift in workspace-local composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) continue # Match list_composers() at line 89 and the global # fallback below: `conversation` is normalised to [] @@ -163,7 +184,12 @@ def get_composer(composer_id): payload["conversation"] = payload.get("conversation") or [] return jsonify(payload) except SchemaError as e: - print(f"Schema drift in {db_path}: {e}") + _logger.warning( + "Schema drift in %s: %s (%s)", + db_path, + e, + type(e).__name__, + ) except (OSError, sqlite3.Error, json.JSONDecodeError, ValueError): pass @@ -186,7 +212,12 @@ def get_composer(composer_id): # Don't return malformed JSON to the client — surface the drift # as a 404 + log, matching the silent-skip behaviour of the # list endpoints for the same row. - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Schema drift in composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) return jsonify({"error": "Composer schema drift"}), 404 payload = dict(composer.raw) payload["conversation"] = payload.get("conversation") or [] diff --git a/api/config_api.py b/api/config_api.py index f5d4e47..ffefd69 100644 --- a/api/config_api.py +++ b/api/config_api.py @@ -6,6 +6,7 @@ src/app/api/get-username/route.ts GET /api/get-username """ +import logging import os import subprocess import sys @@ -16,6 +17,7 @@ from utils.workspace_path import set_workspace_path_override bp = Blueprint("config_api", __name__) +_logger = logging.getLogger(__name__) @bp.route("/api/detect-environment") @@ -44,7 +46,12 @@ def detect_environment(): }) except Exception as e: - print(f"Failed to detect environment: {e}") + _logger.warning( + "Failed to detect environment: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"os": "unknown", "isWSL": False, "isRemote": False}) @@ -80,7 +87,12 @@ def validate_path(): ) except Exception as e: - print(f"Validation error: {e}") + _logger.error( + "Validation error: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"valid": False, "error": "Failed to validate path"}), 500 @@ -135,5 +147,10 @@ def get_username(): return jsonify({"username": username}) except Exception as e: - print(f"Failed to get username: {e}") + _logger.warning( + "Failed to get username: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"username": "YOUR_USERNAME"}) diff --git a/api/export_api.py b/api/export_api.py index 4c8ddf2..b484bb1 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -6,6 +6,7 @@ import io import json +import logging import os import sqlite3 import zipfile @@ -32,6 +33,7 @@ ) bp = Blueprint("export_api", __name__) +_logger = logging.getLogger(__name__) def _get_state_dir() -> str: @@ -181,7 +183,13 @@ def export_chats(): exported.append({"path": rel_path, "content": md, "updatedAt": updated_at_ms}) except Exception as e: - print(f"Error processing composer {composer_id} for export: {e}") + _logger.error( + "Error processing composer %s for export: %s (%s)", + composer_id, + e, + type(e).__name__, + exc_info=True, + ) count = len(exported) if count == 0: @@ -208,7 +216,10 @@ def export_chats(): ) except Exception as e: - print(f"Export error: {e}") - import traceback - traceback.print_exc() + _logger.error( + "Export failed: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": f"Export failed: {str(e)}"}), 500 diff --git a/api/logs.py b/api/logs.py index a213cdc..f5607ea 100644 --- a/api/logs.py +++ b/api/logs.py @@ -14,7 +14,7 @@ from flask import Blueprint, jsonify from utils.workspace_path import resolve_workspace_path -from utils.path_helpers import to_epoch_ms +from utils.path_helpers import to_epoch_ms, warn_workspace_json_read bp = Blueprint("logs", __name__) _logger = logging.getLogger(__name__) @@ -48,8 +48,12 @@ def get_logs(): try: bubble = json.loads(row["value"]) chat_map.setdefault(chat_id, []).append(bubble) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to decode bubble row %s: %s", + row["key"], + e, + ) for chat_id, bubbles in chat_map.items(): bubbles = [b for b in bubbles if isinstance(b, dict)] @@ -90,8 +94,8 @@ def get_logs(): with open(wj_path, "r", encoding="utf-8") as f: wd = json.load(f) workspace_folder = wd.get("folder") - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, name, e) try: # closing() guarantees .close() on scope exit (issue #17). @@ -130,10 +134,18 @@ def get_logs(): "type": "composer", "messageCount": len(c.get("conversation") or []), }) - except Exception: - pass - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read logs from workspace %s: %s", + name, + e, + ) + except Exception as e: + _logger.warning( + "Failed to iterate workspaces under %s: %s", + workspace_path, + e, + ) logs.sort(key=lambda log: log.get("timestamp") or 0, reverse=True) return jsonify({"logs": logs}) diff --git a/api/pdf.py b/api/pdf.py index c5c0088..c47c1d9 100644 --- a/api/pdf.py +++ b/api/pdf.py @@ -4,11 +4,13 @@ """ import io +import logging import re from flask import Blueprint, Response, jsonify, request bp = Blueprint("pdf", __name__) +_logger = logging.getLogger(__name__) def _safe_text(text: str) -> str: @@ -168,9 +170,12 @@ def footer(self): ) except Exception as e: - print(f"Failed to generate PDF: {e}") - import traceback - traceback.print_exc() + _logger.error( + "Failed to generate PDF: %s (%s)", + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": f"Failed to generate PDF: {str(e)}"}), 500 diff --git a/api/search.py b/api/search.py index 79ed0c8..eaede04 100644 --- a/api/search.py +++ b/api/search.py @@ -16,7 +16,7 @@ from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.workspace_path import resolve_workspace_path, get_cli_chats_path -from utils.path_helpers import to_epoch_ms +from utils.path_helpers import to_epoch_ms, warn_workspace_json_read from utils.text_extract import extract_text_from_bubble from utils.cli_chat_reader import list_cli_projects, traverse_blobs, messages_to_bubbles from models import Bubble, Composer, SchemaError @@ -114,10 +114,14 @@ def search(): fn = parts[-1] if parts else None if fn: ws_id_to_name[name] = _url_unquote(fn) - except Exception: - pass - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, name, e) + except Exception as e: + _logger.warning( + "Failed to list workspace entries under %s: %s", + workspace_path, + e, + ) # Build composer → workspace mapping composer_id_to_ws = {} @@ -139,8 +143,12 @@ def search(): cid = c.get("composerId") if isinstance(c, dict) else None if cid: composer_id_to_ws[cid] = entry["name"] - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to load composer mapping from workspace %s: %s", + entry["name"], + e, + ) # Load bubble text for searching bubble_map = {} @@ -156,7 +164,12 @@ def search(): # Drift logged so the operator can see why a chat dropped # out of search results; bad row still skipped so search # keeps returning results from the well-formed ones. - print(f"Schema drift in bubble {bid}: {e}") + _logger.warning( + "Schema drift in bubble %s: %s (%s)", + bid, + e, + type(e).__name__, + ) except (json.JSONDecodeError, ValueError): pass @@ -170,7 +183,12 @@ def search(): try: composer = Composer.from_dict(json.loads(row["value"]), composer_id=composer_id) except SchemaError as e: - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Schema drift in composer %s: %s (%s)", + composer_id, + e, + type(e).__name__, + ) continue except (json.JSONDecodeError, TypeError, ValueError): continue @@ -261,8 +279,12 @@ def search(): "matchingText": matching_text, "type": "composer", }) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to process Composer from composerData:%s during search: %s", + composer_id, + e, + ) except Exception: _logger.exception("Error searching global storage") @@ -288,8 +310,8 @@ def search(): with open(wj_path, "r", encoding="utf-8") as f: wd = json.load(f) workspace_folder = wd.get("folder") - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, name, e) workspace_name = _workspace_display_name_from_folder(workspace_folder, fallback=name) # try/finally guarantees .close() on every exit path (issue #17). @@ -362,13 +384,21 @@ def search(): "type": "chat", }) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to search legacy workspace %s: %s", + name, + e, + ) finally: if conn is not None: conn.close() - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to iterate legacy workspaces under %s: %s", + workspace_path, + e, + ) # --------------------------------------------------------------- # Search Cursor CLI sessions (only for type=all) @@ -386,7 +416,12 @@ def search(): try: messages = traverse_blobs(session["db_path"]) - except Exception: + except Exception as e: + _logger.warning( + "Failed to traverse CLI session blobs for %s: %s", + session_id, + e, + ) continue bubbles = messages_to_bubbles(messages, created_ms) diff --git a/api/workspaces.py b/api/workspaces.py index e778993..2efc2fd 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -15,7 +15,11 @@ from utils.workspace_path import resolve_workspace_path, get_cli_chats_path from utils.cli_chat_reader import list_cli_projects -from utils.path_helpers import get_workspace_folder_paths, get_workspace_display_name +from utils.path_helpers import ( + get_workspace_folder_paths, + get_workspace_display_name, + warn_workspace_json_read, +) from utils.workspace_descriptor import read_json_file from services.workspace_resolver import ( _infer_workspace_name_from_context, @@ -117,7 +121,8 @@ def get_workspace(workspace_id): inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred - except Exception: + except Exception as e: + warn_workspace_json_read(_logger, workspace_id, e) inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred diff --git a/app.py b/app.py index 1caa215..afebb1c 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ from the Cursor editor's AI chat feature. """ +import logging import os import sys from datetime import datetime @@ -35,6 +36,11 @@ def _get_base_path(): def create_app(exclusion_rules_path=None): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(funcName)s: %(message)s", + ) + base = _get_base_path() app = Flask( __name__, diff --git a/requirements-lock.txt b/requirements-lock.txt index e1c0759..4a65662 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -6,7 +6,7 @@ # Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g. # colorama via click) are omitted — pip still installs them on Windows when needed. blinker==1.9.0 # via flask -click==8.4.0 # via flask +click==8.4.1 # via flask defusedxml==0.7.1 # via fpdf2 flask==3.1.3 # via -r requirements.txt fonttools==4.63.0 # via fpdf2 diff --git a/scripts/export.py b/scripts/export.py index f4f93a2..6cea025 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -74,6 +74,18 @@ _logger = logging.getLogger(__name__) +def _configure_cli_logging() -> None: + """Route log records to stderr so stdout stays for export progress lines.""" + root = logging.getLogger() + if root.handlers: + return + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s: %(message)s", + stream=sys.stderr, + ) + + def _json_dump_safe(value) -> str: """Best-effort JSON serialization for exclusion matching.""" try: @@ -165,6 +177,7 @@ def parse_args(): def main(): + _configure_cli_logging() opts = parse_args() since = opts["since"] out_dir = os.path.abspath(opts["out_dir"]) @@ -215,10 +228,9 @@ def main(): with _open_global_db(workspace_path) as (global_db, global_db_path): if global_db is None: - print( - f"Note: Cursor IDE global storage not found at {global_db_path}" - " — skipping IDE chats.", - file=sys.stderr, + _logger.info( + "Cursor IDE global storage not found at %s — skipping IDE chats.", + global_db_path, ) else: project_layouts_map = load_project_layouts_map(global_db) @@ -347,7 +359,12 @@ def main(): try: cli_projects = list_cli_projects(get_cli_chats_path()) except Exception as e: - print(f"Warning: Could not enumerate CLI chats ({e}) — skipping.", file=sys.stderr) + _logger.warning( + "Could not enumerate CLI chats: %s (%s) — skipping", + e, + type(e).__name__, + exc_info=True, + ) cli_projects = [] for cp in cli_projects: @@ -378,7 +395,13 @@ def main(): messages = traverse_blobs(session["db_path"]) bubbles = messages_to_bubbles(messages, created_ms) except Exception as e: - print(f"Warning: Could not read CLI session {session_id}: {e}", file=sys.stderr) + _logger.warning( + "Could not read CLI session %s: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue if not bubbles: diff --git a/services/cli_tabs.py b/services/cli_tabs.py index 6edad8f..f28aafc 100644 --- a/services/cli_tabs.py +++ b/services/cli_tabs.py @@ -1,9 +1,12 @@ from __future__ import annotations +import logging from datetime import datetime from flask import current_app, jsonify +_logger = logging.getLogger(__name__) + from utils.cli_chat_reader import list_cli_projects, messages_to_bubbles, traverse_blobs from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.workspace_path import get_cli_chats_path @@ -44,13 +47,25 @@ def _get_cli_workspace_tabs(workspace_id: str): try: messages = traverse_blobs(session["db_path"]) except Exception as e: - print(f"CLI: could not read session {session_id}: {e}") + _logger.warning( + "Could not read CLI session %s: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue try: bubbles = messages_to_bubbles(messages, created_ms) except Exception as e: - print(f"CLI: could not convert session {session_id} to bubbles: {e}") + _logger.warning( + "Could not convert CLI session %s to bubbles: %s (%s)", + session_id, + e, + type(e).__name__, + exc_info=True, + ) continue if not bubbles: continue @@ -113,5 +128,11 @@ def _get_cli_workspace_tabs(workspace_id: str): return jsonify({"tabs": tabs}) except Exception as e: - print(f"Failed to get CLI workspace tabs: {e}") + _logger.error( + "Failed to get CLI workspace tabs for %s: %s (%s)", + workspace_id, + e, + type(e).__name__, + exc_info=True, + ) return jsonify({"error": "Failed to get CLI workspace tabs"}), 500 diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 228fad2..a23dbfd 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -1,19 +1,24 @@ from __future__ import annotations import json +import logging import os import sqlite3 from datetime import datetime, timezone +_logger = logging.getLogger(__name__) + from utils.cli_chat_reader import list_cli_projects from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.path_helpers import ( get_workspace_folder_paths, normalize_file_path, to_epoch_ms, + warn_workspace_json_read, ) from utils.workspace_descriptor import read_json_file from utils.workspace_path import get_cli_chats_path +from models import Composer, SchemaError from services.workspace_db import ( _build_composer_id_to_workspace_id, _collect_invalid_workspace_ids, @@ -72,7 +77,32 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: cid = row["key"].split(":")[1] try: - cd = json.loads(row["value"]) + parsed = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + _logger.warning( + "Failed to decode Composer from composerData:%s: %s", + cid, + e, + ) + continue + if not isinstance(parsed, dict): + _logger.warning( + "Failed to parse Composer from composerData:%s: expected object, got %s", + cid, + type(parsed).__name__, + ) + continue + try: + composer = Composer.from_dict(parsed, composer_id=cid) + except SchemaError as e: + _logger.warning( + "Failed to parse Composer from composerData:%s: %s", + cid, + e, + ) + continue + cd = composer.raw + try: pid = _determine_project_for_conversation( cd, cid, project_layouts_map, project_name_map, workspace_path_map, @@ -98,10 +128,18 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: "lastUpdatedAt": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or 0, "createdAt": to_epoch_ms(cd.get("createdAt")) or 0, }) - except Exception: - pass - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to process Composer from composerData:%s: %s", + cid, + e, + ) + except Exception as e: + _logger.error( + "Failed to load composer rows from global storage: %s", + e, + exc_info=True, + ) # Group workspace entries by normalized folder path folder_to_entries: dict[str, list] = {} @@ -114,8 +152,8 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: first_folder = folders[0] if folders else None if first_folder: norm_folder = normalize_file_path(first_folder) - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) if not norm_folder: norm_folder = entry["name"] # fallback to workspace ID entry_folder_map[entry["name"]] = norm_folder @@ -139,7 +177,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for e in group if os.path.isfile(os.path.join(workspace_path, e["name"], "state.vscdb")) ) - except Exception: + except Exception as e: + _logger.warning( + "Failed to resolve mtime for workspace folder %s: %s", + norm_folder, + e, + ) mtime = 0 workspace_name = _get_workspace_display_name(workspace_path, primary["name"]) @@ -238,7 +281,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: "source": "cli", }) except Exception as e: - print(f"Failed to load CLI projects: {e}") + _logger.warning("Failed to load CLI projects: %s", e) projects.sort(key=lambda p: p["lastModified"], reverse=True) return projects diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index c27da96..cf34be1 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging import os import re import sqlite3 @@ -8,10 +9,13 @@ from contextlib import closing from pathlib import Path +_logger = logging.getLogger(__name__) + from utils.path_helpers import ( get_workspace_display_name, get_workspace_folder_paths, normalize_file_path, + warn_workspace_json_read, ) from utils.workspace_descriptor import basename_from_pathish, read_json_file from services.workspace_db import _open_global_db @@ -28,8 +32,12 @@ def _get_workspace_display_name(workspace_path: str, workspace_id: str) -> str: name = get_workspace_display_name(workspace.raw) if name: return name - except (SchemaError, OSError, ValueError): - pass + except (SchemaError, OSError, ValueError) as e: + _logger.warning( + "Failed to parse Workspace from %s: %s", + workspace_id, + e, + ) return workspace_id @@ -131,8 +139,8 @@ def _get_project_from_file_path( if is_within_workspace and len(wp) > best_len: best_len = len(wp) best_match = entry["name"] - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) return best_match @@ -147,8 +155,8 @@ def _create_project_name_to_workspace_id_map(workspace_entries): folder_name = parts[-1] if parts else None if folder_name: mapping[folder_name] = entry["name"] - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) return mapping @@ -160,8 +168,8 @@ def _create_workspace_path_to_id_map(workspace_entries): for folder in get_workspace_folder_paths(wd): normalized = normalize_file_path(folder) out[normalized] = entry["name"] - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) return out @@ -274,8 +282,8 @@ def _determine_project_for_conversation( name = re.sub(r"^file://", "", folder).replace("\\", "/").split("/")[-1] if name: folder_name_to_ws.append({"name": name, "id": entry["name"]}) - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) best_id = None best_len = 0 @@ -312,7 +320,19 @@ def _infer_invalid_workspace_aliases( continue try: cd = json.loads(row["value"]) - except Exception: + except Exception as e: + _logger.warning( + "Failed to decode Composer from composerData:%s: %s", + cid, + e, + ) + continue + if not isinstance(cd, dict): + _logger.warning( + "Failed to parse Composer from composerData:%s: expected object, got %s", + cid, + type(cd).__name__, + ) continue inferred = _determine_project_for_conversation( cd, diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 42fa807..f993e7d 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -1,15 +1,20 @@ from __future__ import annotations +import hashlib import json +import logging import os import sqlite3 from datetime import datetime from typing import Any +_logger = logging.getLogger(__name__) + from utils.path_helpers import ( get_workspace_folder_paths, normalize_file_path, to_epoch_ms, + warn_workspace_json_read, ) from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.text_extract import extract_text_from_bubble @@ -33,16 +38,45 @@ -def _try_loads_kv_value(raw: str | None) -> Any | None: - """Parse a cursorDiskKV ``value`` column; ``None`` on missing or unparseable input (no raise).""" +def _loads_kv_value_logged(key: str, raw: object | None) -> Any | None: + """Parse a cursorDiskKV ``value``; log and return ``None`` on decode failure.""" if raw is None: return None + if not isinstance(raw, (str, bytes, bytearray)): + payload_len, payload_fp = _kv_payload_log_meta(raw) + _logger.warning( + "Failed to decode cursorDiskKV value for %s: unsupported type %s (payload_len=%d, payload_sha256=%s)", + key, + type(raw).__name__, + payload_len, + payload_fp, + ) + return None try: return json.loads(raw) - except (json.JSONDecodeError, TypeError, ValueError): + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(raw) + _logger.warning( + "Failed to decode cursorDiskKV value for %s: %s (payload_len=%d, payload_sha256=%s)", + key, + e, + payload_len, + payload_fp, + ) return None +def _kv_payload_log_meta(value: object | None) -> tuple[int, str | None]: + """Byte length and short SHA-256 prefix for logs without emitting raw KV payloads.""" + if value is None: + return 0, None + if isinstance(value, bytes): + payload = value + else: + payload = str(value).encode("utf-8", errors="replace") + return len(payload), hashlib.sha256(payload).hexdigest()[:12] + + def assemble_workspace_tabs( workspace_id: str, workspace_path: str, @@ -69,8 +103,8 @@ def assemble_workspace_tabs( first_folder = folders[0] if folders else None if first_folder: target_folder = normalize_file_path(first_folder) - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, workspace_id, e) if target_folder: for entry in workspace_entries: try: @@ -79,8 +113,8 @@ def assemble_workspace_tabs( f2 = folders2[0] if folders2 else None if f2 and normalize_file_path(f2) == target_folder: matching_ws_ids.add(entry["name"]) - except Exception: - pass + except Exception as e: + warn_workspace_json_read(_logger, entry["name"], e) bubble_map: dict[str, dict] = {} code_block_diff_map: dict[str, list] = {} @@ -103,8 +137,24 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: parts = row["key"].split(":") if len(parts) >= 3: bid = parts[2] - parsed = _try_loads_kv_value(row["value"]) - if parsed is None: + if row["value"] is None: + _logger.warning( + "Skipping Bubble cursorDiskKV row with NULL value: key=%r", + row["key"], + ) + continue + try: + parsed = json.loads(row["value"]) + + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) + _logger.warning( + "Failed to decode Bubble from %s: %s (payload_len=%d, payload_sha256=%s)", + row["key"], + e, + payload_len, + payload_fp, + ) continue try: bubble_obj = Bubble.from_dict(parsed, bubble_id=bid) @@ -113,7 +163,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Drift logged so the operator can chase disappearing # bubbles instead of guessing. Bad row still skipped so the # tabs endpoint can't 500 on one malformed bubble. - print(f"Schema drift in bubble {bid}: {e}") + _logger.warning( + "Failed to parse Bubble from bubbleId:%s: %s", + bid, + e, + ) # Load codeBlockDiffs code_block_diff_map = load_code_block_diff_map(global_db) @@ -126,7 +180,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if len(parts) < 2: continue chat_id = parts[1] - ctx = _try_loads_kv_value(row["value"]) + ctx = _loads_kv_value_logged(row["key"], row["value"]) if not isinstance(ctx, dict): continue @@ -144,7 +198,10 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: project_layouts_map.setdefault(chat_id, []) for layout in layouts: if isinstance(layout, str): - layout = _try_loads_kv_value(layout) + layout = _loads_kv_value_logged( + f"{row['key']}:projectLayout", + layout, + ) if not isinstance(layout, dict): continue if isinstance(layout, dict) and layout.get("rootPath"): @@ -170,8 +227,24 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: composer_id = row["key"].split(":")[1] - parsed = _try_loads_kv_value(row["value"]) - if parsed is None: + if row["value"] is None: + _logger.warning( + "Skipping Composer cursorDiskKV row with NULL value: key=%r", + row["key"], + ) + continue + try: + parsed = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + payload_len, payload_fp = _kv_payload_log_meta(row["value"]) + _logger.warning( + "Failed to decode Composer from composerData:%s: %s (key=%s, payload_len=%d, payload_sha256=%s)", + composer_id, + e, + row["key"], + payload_len, + payload_fp, + ) continue try: composer = Composer.from_dict(parsed, composer_id=composer_id) @@ -179,7 +252,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Drift skipped + logged so the two primary conversation # paths (list_workspaces + get_workspace_tabs) agree on what # counts as a valid composer. - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Failed to parse Composer from composerData:%s: %s", + composer_id, + e, + ) continue try: cd = composer.raw @@ -497,7 +574,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: response["tabs"].append(tab) except Exception as e: - print(f"Error parsing composer data for {composer_id}: {e}") + _logger.warning( + "Failed to process Composer from composerData:%s: %s", + composer_id, + e, + ) # Sort tabs by timestamp descending (newest first) response["tabs"].sort(key=lambda t: t.get("timestamp") or 0, reverse=True) diff --git a/tests/test_invalid_workspace_aliases.py b/tests/test_invalid_workspace_aliases.py index ae9ee81..1daa30a 100644 --- a/tests/test_invalid_workspace_aliases.py +++ b/tests/test_invalid_workspace_aliases.py @@ -87,6 +87,36 @@ def test_drifted_composer_does_not_skew_vote(self): # cid-3 is dropped (drift), so boost-ws wins 2-0 (not 2-1) self.assertEqual(aliases.get("invalid-ws"), "boost-ws") + def test_non_dict_composer_json_skipped_without_crash(self) -> None: + composer_rows = [ + {"key": "composerData:cid-1", "value": json.dumps({"createdAt": 1_715_000_000_000, "fullConversationHeadersOnly": []})}, + {"key": "composerData:cid-2", "value": json.dumps({"createdAt": 1_715_000_000_000, "fullConversationHeadersOnly": []})}, + {"key": "composerData:cid-bad", "value": json.dumps("not-a-dict")}, + ] + composer_id_to_ws = {"cid-1": "invalid-ws", "cid-2": "invalid-ws", "cid-bad": "invalid-ws"} + project_layouts_map = { + "cid-1": [normalize_file_path(r"d:\_Cpp_Digest\boostbacklog")], + "cid-2": [normalize_file_path(r"d:\_Cpp_Digest\boostbacklog")], + "cid-bad": [normalize_file_path(r"d:\_Cpp_Digest\team-brain")], + } + workspace_path_map = { + normalize_file_path(r"d:\_cpp_digest\boostbacklog"): "boost-ws", + normalize_file_path(r"d:\_cpp_digest\team-brain"): "team-ws", + } + + aliases = _infer_invalid_workspace_aliases( + composer_rows=composer_rows, + project_layouts_map=project_layouts_map, + project_name_map={}, + workspace_path_map=workspace_path_map, + workspace_entries=[], + bubble_map={}, + composer_id_to_ws=composer_id_to_ws, + invalid_workspace_ids={"invalid-ws"}, + ) + + self.assertEqual(aliases.get("invalid-ws"), "boost-ws") + if __name__ == "__main__": unittest.main() diff --git a/tests/test_models_wired_at_read_sites.py b/tests/test_models_wired_at_read_sites.py index 491ca58..0e80c89 100644 --- a/tests/test_models_wired_at_read_sites.py +++ b/tests/test_models_wired_at_read_sites.py @@ -130,9 +130,11 @@ def test_workspace_tabs_endpoint_calls_bubble_from_dict(self): def test_bubble_schema_drift_is_logged_not_swallowed_silently(self): # CodeRabbit: SchemaError used to be lumped in with JSONDecodeError / - # ValueError and skipped silently. Schema drift must now print a + # ValueError and skipped silently. Schema drift must now log a # `Schema drift in bubble ` line so disappearing bubbles can be # traced. The well-formed row still loads alongside. + import logging + from app import create_app # Seed a deliberately-malformed bubble row that will trip # Bubble.from_dict's "expected non-empty str" gate on the bubble_id by @@ -147,17 +149,14 @@ def test_bubble_schema_drift_is_logged_not_swallowed_silently(self): app = create_app() app.config["TESTING"] = True app.config["EXCLUSION_RULES"] = [] - import io - from contextlib import redirect_stdout - captured = io.StringIO() - with redirect_stdout(captured): + with self.assertLogs("api.search", level="WARNING") as logs: client = app.test_client() response = client.get("/api/search?q=sentinel-wired") self.assertEqual(response.status_code, 200) - out = captured.getvalue() - self.assertIn("Schema drift in bubble", out, - msg=f"expected drift log line, got stdout:\n{out!r}") - self.assertIn("bub-bad", out, + messages = "\n".join(logs.output) + self.assertIn("Schema drift in bubble", messages, + msg=f"expected drift log line, got logs:\n{messages!r}") + self.assertIn("bub-bad", messages, msg="drift log must include the offending bubble id") def test_workspace_tabs_endpoint_calls_composer_from_dict(self): diff --git a/tests/test_parse_failure_logging.py b/tests/test_parse_failure_logging.py new file mode 100644 index 0000000..941835e --- /dev/null +++ b/tests/test_parse_failure_logging.py @@ -0,0 +1,191 @@ +"""Tests for structured logging at model parse sites (issue #66).""" + +from __future__ import annotations + +import json +import os +import sqlite3 +import sys +import tempfile +import unittest +from contextlib import closing + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if REPO_ROOT not in sys.path: + sys.path.insert(0, REPO_ROOT) + +from services.workspace_listing import list_workspace_projects +from services.workspace_tabs import assemble_workspace_tabs + + +def _seed_listing_with_drifted_composer(parent: str) -> str: + ws_root = os.path.join(parent, "workspaceStorage") + global_root = os.path.join(parent, "globalStorage") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(global_root, exist_ok=True) + + ws_dir = os.path.join(ws_root, "ws-a") + os.makedirs(ws_dir, exist_ok=True) + target_folder = os.path.join(parent, "real-project") + os.makedirs(target_folder, exist_ok=True) + with open(os.path.join(ws_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folder": f"file://{target_folder}"}, f) + sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close() + + conn = sqlite3.connect(os.path.join(global_root, "state.vscdb")) + conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ( + "composerData:cmp-drift", + json.dumps({ + "name": "Drifted composer", + "fullConversationHeadersOnly": [{"bubbleId": "b-1"}], + }), + ), + ) + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-drift:b-1", json.dumps({"type": "user", "text": "hello"})), + ) + conn.commit() + conn.close() + return ws_root + + +def _seed_tabs_with_drifted_bubble(parent: str) -> str: + ws_root = os.path.join(parent, "workspaceStorage") + global_root = os.path.join(parent, "globalStorage") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(global_root, exist_ok=True) + + ws_dir = os.path.join(ws_root, "ws-a") + os.makedirs(ws_dir, exist_ok=True) + with open(os.path.join(ws_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folder": "/tmp/proj"}, f) + sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close() + + conn = sqlite3.connect(os.path.join(global_root, "state.vscdb")) + conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ( + "composerData:cmp-ok", + json.dumps({ + "name": "Good tab", + "createdAt": 1_715_000_000_000, + "lastUpdatedAt": 1_715_000_500_000, + "fullConversationHeadersOnly": [ + {"bubbleId": "b-bad", "type": 1}, + {"bubbleId": "b-good", "type": 1}, + ], + }), + ), + ) + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-ok:b-bad", json.dumps("not-a-dict")), + ) + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-ok:b-good", json.dumps({"text": "hello"})), + ) + conn.commit() + conn.close() + return ws_root + + +class TestParseFailureLogging(unittest.TestCase): + def test_listing_logs_composer_schema_drift(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_listing_with_drifted_composer(tmp) + with self.assertLogs("services.workspace_listing", level="WARNING") as cm: + list_workspace_projects(ws_root, rules=[]) + + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("Composer" in m and "cmp-drift" in m for m in messages), + f"expected Composer parse warning for cmp-drift, got: {messages}", + ) + + def test_workspace_tabs_logs_bubble_json_decode_failure(self) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("bubbleId:cmp-ok:b-json", "{not valid json"), + ) + conn.commit() + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("decode Bubble" in m and "b-json" in m for m in messages), + f"expected JSON decode warning for b-json, got: {messages}", + ) + + def test_workspace_tabs_logs_composer_json_decode_failure(self) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + bad_composer_value = ( + '{"fullConversationHeadersOnly": [{"bubbleId": "b1"}], "createdAt":' + ) + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("composerData:cmp-json", bad_composer_value), + ) + conn.commit() + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("decode Composer" in m and "cmp-json" in m for m in messages), + f"expected JSON decode warning for cmp-json, got: {messages}", + ) + + def test_workspace_tabs_logs_bubble_schema_drift(self) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + self.assertIn("cmp-ok", [t["id"] for t in payload.get("tabs", [])]) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("Bubble" in m and "b-bad" in m for m in messages), + f"expected Bubble parse warning for b-bad, got: {messages}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_workspace_tabs_null_bubble.py b/tests/test_workspace_tabs_null_bubble.py index 3ce69ce..3d83eea 100644 --- a/tests/test_workspace_tabs_null_bubble.py +++ b/tests/test_workspace_tabs_null_bubble.py @@ -2,8 +2,8 @@ A cursorDiskKV row with a NULL value column previously caused json.loads(None) -> TypeError, which propagated as a 500 response. -The fix uses ``_try_loads_kv_value`` in ``services/workspace_tabs.py`` so -NULL / unparseable cursorDiskKV values are skipped without raising. +Bubble rows with NULL or invalid JSON values are skipped in +``services/workspace_tabs.py`` without raising. """ import json @@ -72,15 +72,21 @@ def tearDown(self): def test_null_bubble_row_is_skipped_without_exception(self): """assemble_workspace_tabs must not raise when a bubble row has NULL value.""" try: - _payload, status = assemble_workspace_tabs( - workspace_id="global", - workspace_path=self.workspace_path, - rules=[], - ) + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + _payload, status = assemble_workspace_tabs( + workspace_id="global", + workspace_path=self.workspace_path, + rules=[], + ) except TypeError as exc: self.fail(f"NULL bubble row raised TypeError: {exc}") self.assertEqual(status, 200, "NULL bubble row must not turn tabs load into an error response") + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("NULL value" in m and "bubble-null" in m for m in messages), + f"expected NULL-value warning for bubble-null row, got: {messages}", + ) def test_healthy_bubbles_still_load_when_null_row_present(self): """The healthy bubble surfaces in a tab even when a NULL row is present.""" diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py index 2b2be00..14dbd0c 100644 --- a/utils/cli_chat_reader.py +++ b/utils/cli_chat_reader.py @@ -33,7 +33,10 @@ from __future__ import annotations import json +import logging from models import CliSessionMeta, SchemaError + +_logger = logging.getLogger(__name__) import os import re import sqlite3 @@ -100,7 +103,11 @@ def traverse_blobs(db_path: str) -> list[dict]: json.loads(bytes.fromhex(meta_row[0]).decode("utf-8")) ) except (SchemaError, ValueError, UnicodeDecodeError, TypeError) as e: - print(f"Schema drift in CLI session meta at {db_path}: {e}") + _logger.warning( + "Schema drift in CLI session meta at %s: %s", + db_path, + e, + ) return [] root_id: str = meta.latest_root_blob_id diff --git a/utils/path_helpers.py b/utils/path_helpers.py index 3b4201d..55af44f 100644 --- a/utils/path_helpers.py +++ b/utils/path_helpers.py @@ -1,5 +1,6 @@ """Path utility functions mirroring src/utils/path.ts""" +import logging import os import sys from datetime import datetime @@ -142,3 +143,16 @@ def get_workspace_display_name(workspace_data: dict, fallback: str | None = None if decoded: return decoded return fallback or "" + + +def warn_workspace_json_read( + logger: logging.Logger, + workspace_id: str, + err: BaseException, +) -> None: + """Log a standard warning when workspace.json cannot be read (shared across services/api).""" + logger.warning( + "Failed to read workspace.json for %s: %s", + workspace_id, + err, + )