Add fast memory search mode#188
Conversation
There was a problem hiding this comment.
Code Review
This pull request enhances the /search endpoint by adding optional LLM-based answer synthesis, expanding search domains to include code and snippets, and implementing detailed latency tracking. It also introduces TTL-based caching for retrieval plans and profile catalogs to improve efficiency. Review feedback identifies an opportunity to further optimize search performance by executing domain queries concurrently and suggests removing an unnecessary side effect in the profile search logic.
| if "profile" in domains: | ||
| all_results.extend(await _timed_sync("profile", timings, _search_profile, pipeline, user_id)) | ||
| if "temporal" in domains: | ||
| all_results.extend(await _timed_sync("temporal", timings, _search_temporal, pipeline, req.query, user_id, req.top_k)) | ||
| if "summary" in domains: | ||
| all_results.extend(await _timed_async("summary", timings, _search_summary, pipeline, req.query, user_id, req.top_k)) | ||
| if "snippet" in domains: | ||
| all_results.extend(await _timed_async("snippet", timings, _search_snippet, pipeline, req.query, user_id, req.top_k)) | ||
| if "code" in domains: | ||
| if not req.org_id or not req.repo: | ||
| return _error(request, "org_id and repo are required when domains includes 'code'.", 400, 0) | ||
| all_results.extend(await _timed_async("code", timings, _search_code, req, user_id)) |
There was a problem hiding this comment.
The domain searches are currently executed sequentially. To improve performance for this 'fast search' endpoint, these searches should be run concurrently using asyncio.gather.
| if "profile" in domains: | |
| all_results.extend(await _timed_sync("profile", timings, _search_profile, pipeline, user_id)) | |
| if "temporal" in domains: | |
| all_results.extend(await _timed_sync("temporal", timings, _search_temporal, pipeline, req.query, user_id, req.top_k)) | |
| if "summary" in domains: | |
| all_results.extend(await _timed_async("summary", timings, _search_summary, pipeline, req.query, user_id, req.top_k)) | |
| if "snippet" in domains: | |
| all_results.extend(await _timed_async("snippet", timings, _search_snippet, pipeline, req.query, user_id, req.top_k)) | |
| if "code" in domains: | |
| if not req.org_id or not req.repo: | |
| return _error(request, "org_id and repo are required when domains includes 'code'.", 400, 0) | |
| all_results.extend(await _timed_async("code", timings, _search_code, req, user_id)) | |
| tasks = [] | |
| if "profile" in domains: | |
| tasks.append(_timed_sync("profile", timings, _search_profile, pipeline, user_id)) | |
| if "temporal" in domains: | |
| tasks.append(_timed_sync("temporal", timings, _search_temporal, pipeline, req.query, user_id, req.top_k)) | |
| if "summary" in domains: | |
| tasks.append(_timed_async("summary", timings, _search_summary, pipeline, req.query, user_id, req.top_k)) | |
| if "snippet" in domains: | |
| tasks.append(_timed_async("snippet", timings, _search_snippet, pipeline, req.query, user_id, req.top_k)) | |
| if "code" in domains: | |
| if not req.org_id or not req.repo: | |
| return _error(request, "org_id and repo are required when domains includes 'code'.", 400, 0) | |
| tasks.append(_timed_async("code", timings, _search_code, req, user_id)) | |
| results_from_domains = await asyncio.gather(*tasks) | |
| for domain_results in results_from_domains: | |
| all_results.extend(domain_results) |
| ) | ||
| return [SourceRecord(domain="profile", content=r.content, score=r.score, metadata=r.metadata) for r in raw] | ||
| _, raw = pipeline._fetch_profile_catalog(user_id) | ||
| pipeline._cached_profile_records = raw |
This PR adds a fast search path for memory lookup without forcing the full agentic retrieval flow every time. The default behavior is raw search results, and callers can opt into answer synthesis with
answer=truewhen they still want an LLM-generated response.What changed:
/v1/memory/searchto return ranked hits from profile, temporal, summary, snippet, and code memoryanswer=trueTested with:
Fixes #163