Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
42fce32
feat(graphile-llm): wire billing metering into LLM plugins
pyramation May 18, 2026
0db269f
refactor(graphile-llm): separate metering into standalone LlmMetering…
pyramation May 18, 2026
b0c52d6
feat(graphile-llm): meter slug = model name, three-level waterfall
pyramation May 18, 2026
19bec10
remove docs/spec/llm-metering.md (moving to constructive-db)
pyramation May 18, 2026
57adfb4
fix(graphile-llm): rename encrypted_secrets_module to config_secrets_…
pyramation May 18, 2026
95ce8fc
fix(graphile-llm): remove estimatedEmbeddingTokens, estimate from tex…
pyramation May 18, 2026
47dab61
feat(graphile-llm): add agent discovery plugin and REST streaming end…
pyramation May 20, 2026
ac85f01
fix: restore inline documentation comments removed in previous commits
pyramation May 20, 2026
941d909
fix: restore remaining inline comments (inject/remove, recurse, repla…
pyramation May 20, 2026
ea6fd09
refactor: remove fake token estimation from metering
pyramation May 20, 2026
215169d
refactor(graphile-llm): centralize LLM env config in env.ts
pyramation May 20, 2026
9c0265c
refactor: replace smart tag discovery with module config query
pyramation May 20, 2026
c35885e
refactor: remove apiKey/api_key_ref from graphile-llm
pyramation May 20, 2026
5d407c2
feat(llm-api): wire metering + add global /v1/threads routes
pyramation May 20, 2026
10a1b4b
feat(llm-api): add inference logging to usage_log_inference table
pyramation May 20, 2026
6bda898
test(llm-api): add unit tests for metering + route registration
pyramation May 20, 2026
43eb67a
refactor(llm-api): use generate() only, remove generateWithUsage runt…
pyramation May 20, 2026
6dd823b
fix(graphile-llm): restore embed latency timing in text-mutation-plugin
pyramation May 21, 2026
e90630b
fix(graphile-llm): add latency timing to search embed and unmetered e…
pyramation May 21, 2026
24b194d
refactor(graphile-cache): add ModuleConfigCache<T>, migrate all LLM p…
pyramation May 21, 2026
e17e1c2
refactor(llm-api): replace token_usage jsonb with model text on messa…
pyramation May 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions graphile/graphile-cache/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ export {

// Factory for creating PostGraphile v5 instances
export { createGraphileInstance } from './create-instance';

// Generic module config cache for plugin lookups
export { ModuleConfigCache, ModuleConfigCacheOptions } from './module-config-cache';
65 changes: 65 additions & 0 deletions graphile/graphile-cache/src/module-config-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* ModuleConfigCache — generic LRU cache for module config lookups
*
* Provides a bounded, TTL-based cache for runtime config discovery
* (billing config, agent discovery, inference log info, etc.).
*
* Wraps lru-cache with:
* - LRU eviction (bounded memory)
* - TTL-based expiry
* - Named logging for debugging
* - clear() hook for future LISTEN/NOTIFY invalidation
*/

import { LRUCache } from 'lru-cache';
import { Logger } from '@pgpmjs/logger';

export interface ModuleConfigCacheOptions {
/** Cache name (used in log prefix) */
name: string;
/** Max entries before LRU eviction (default: 100) */
max?: number;
/** TTL in milliseconds (default: 60_000) */
ttlMs?: number;
}

export class ModuleConfigCache<T> {
private cache: LRUCache<string, T>;
private log: Logger;
readonly name: string;

constructor(opts: ModuleConfigCacheOptions) {
this.name = opts.name;
this.log = new Logger(`cache:${opts.name}`);
this.cache = new LRUCache<string, T>({
max: opts.max ?? 100,
ttl: opts.ttlMs ?? 60_000,
updateAgeOnGet: true,
});
}

get(key: string): T | undefined {
return this.cache.get(key);
}

set(key: string, value: T): void {
this.cache.set(key, value);
}

delete(key: string): boolean {
return this.cache.delete(key);
}

clear(): void {
this.log.debug(`Clearing all entries (size=${this.cache.size})`);
this.cache.clear();
}

get size(): number {
return this.cache.size;
}

has(key: string): boolean {
return this.cache.has(key);
}
}
2 changes: 1 addition & 1 deletion graphile/graphile-llm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
},
"dependencies": {
"@agentic-kit/ollama": "^1.0.3",
"@constructive-io/graphql-env": "workspace:^"
"graphile-cache": "workspace:^"
},
"peerDependencies": {
"@dataplan/pg": "1.0.0",
Expand Down
10 changes: 6 additions & 4 deletions graphile/graphile-llm/src/__tests__/graphile-llm.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ describe('Embedder abstraction', () => {
process.env = originalEnv;
});

it('returns null when EMBEDDER_PROVIDER is not set', () => {
it('returns default ollama embedder when EMBEDDER_PROVIDER is not set', () => {
process.env = { ...originalEnv };
delete process.env.EMBEDDER_PROVIDER;
const embedder = buildEmbedderFromEnv();
expect(embedder).toBeNull();
expect(embedder).not.toBeNull();
expect(typeof embedder).toBe('function');
});

it('builds embedder from environment variables', () => {
Expand Down Expand Up @@ -491,11 +492,12 @@ describe('Chat completion abstraction', () => {
process.env = originalEnv;
});

it('returns null when CHAT_PROVIDER is not set', () => {
it('returns default ollama chat completer when CHAT_PROVIDER is not set', () => {
process.env = { ...originalEnv };
delete process.env.CHAT_PROVIDER;
const chat = buildChatCompleterFromEnv();
expect(chat).toBeNull();
expect(chat).not.toBeNull();
expect(typeof chat).toBe('function');
});

it('builds chat completer from environment variables', () => {
Expand Down
24 changes: 8 additions & 16 deletions graphile/graphile-llm/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
*/

import OllamaClient from '@agentic-kit/ollama';
import { getEnvOptions } from '@constructive-io/graphql-env';
import { getLlmEnvOptions } from './env';
import type { ChatConfig, ChatFunction, ChatMessage, ChatOptions, LlmModuleData } from './types';

// ─── Built-in Providers ─────────────────────────────────────────────────────
Expand Down Expand Up @@ -91,27 +91,19 @@ export function buildChatCompleterFromModule(data: LlmModuleData): ChatFunction
provider: data.chat_provider,
model: data.chat_model,
baseUrl: data.chat_base_url,
apiKey: data.api_key_ref,
});
}

/**
* Resolve a chat completer from environment variables via getEnvOptions().
* Resolve a chat completer from environment variables.
* This is a fallback for development when no llm_module or defaultChatCompleter is configured.
*
* Environment variables (parsed by @constructive-io/graphql-env):
* CHAT_PROVIDER - Provider name ('ollama')
* CHAT_MODEL - Model identifier (e.g. 'llama3')
* CHAT_BASE_URL - Provider base URL
* Environment variables (with defaults from env.ts):
* CHAT_PROVIDER - Provider name (default: 'ollama')
* CHAT_MODEL - Model identifier (default: 'llama3')
* CHAT_BASE_URL - Provider base URL (default: 'http://localhost:11434')
*/
export function buildChatCompleterFromEnv(): ChatFunction | null {
const { llm } = getEnvOptions();
const provider = llm?.chat?.provider;
if (!provider) return null;

return buildChatCompleter({
provider,
model: llm?.chat?.model,
baseUrl: llm?.chat?.baseUrl,
});
const { chat } = getLlmEnvOptions();
return buildChatCompleter(chat);
}
159 changes: 159 additions & 0 deletions graphile/graphile-llm/src/config-cache.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/**
* config-cache — Per-database LLM billing configuration cache
*
* Caches resolved billing function names per database_id.
* Uses an LRU cache with TTL so config changes propagate within a bounded window
* without requiring a server restart.
*
* Resolution flow:
* Billing config from `metaschema_modules_public.billing_module`
* (schema name + function names for record_usage, check_billing_quota)
*
* All queries run through the Graphile `withPgClient` callback, which gives us
* a client connected to the tenant database with proper role settings.
*
* The LLM module config (provider, model, etc.) is already resolved by the
* LlmModulePlugin at schema-build time. This cache handles the runtime-only
* billing piece.
*/

import { ModuleConfigCache } from 'graphile-cache';

// ─── Types ──────────────────────────────────────────────────────────────────

/**
* Generic pg client interface matching what Graphile's withPgClient provides.
* Avoids a hard dependency on the `pg` package.
*/
export interface PgClient {
query(sql: string, values?: unknown[]): Promise<{ rows: Record<string, unknown>[] }>;
}

/**
* Billing function metadata resolved from the billing_module metaschema table.
*/
export interface BillingConfig {
/** Private schema containing the billing functions */
privateSchema: string;
/** Name of the record_usage function */
recordUsageFunction: string;
/** Name of the check_billing_quota function */
checkBillingQuotaFunction: string;
/** Public schema containing meters table */
publicSchema: string;
}

/**
* Per-database cached configuration for the LLM billing integration.
*/
export interface LlmBillingCacheEntry {
/** Billing function references (null if billing_module not provisioned) */
billing: BillingConfig | null;
}

// ─── SQL Queries ────────────────────────────────────────────────────────────

/**
* Check if the billing_module table exists before querying it.
* This prevents hard errors on databases that don't have the billing
* module provisioned (the metaschema_modules_public schema or the
* billing_module table might not exist at all).
*/
const BILLING_MODULE_SQL = `
SELECT
s.schema_name AS public_schema,
ps.schema_name AS private_schema,
bm.record_usage_function
FROM metaschema_modules_public.billing_module bm
JOIN metaschema_public.schema s ON bm.schema_id = s.id
JOIN metaschema_public.schema ps ON bm.private_schema_id = ps.id
WHERE bm.database_id = $1
LIMIT 1
`;

// ─── Cache ──────────────────────────────────────────────────────────────────

const billingCache = new ModuleConfigCache<LlmBillingCacheEntry>({
name: 'billing-config',
ttlMs: 5 * 60 * 1000, // 5 minutes
max: 50,
});

// ─── Resolution Functions ───────────────────────────────────────────────────

/**
* SQL to check if a schema exists. Used as a guard before querying
* metaschema tables that may not be provisioned.
*/
const SCHEMA_EXISTS_SQL = `
SELECT 1 FROM information_schema.schemata WHERE schema_name = $1 LIMIT 1
`;

async function resolveBillingConfig(
pgClient: PgClient,
databaseId: string,
): Promise<BillingConfig | null> {
try {
// Guard: check if the metaschema_modules_public schema exists.
// If the database doesn't have the billing module provisioned,
// this schema (or the billing_module table) won't exist.
const schemaCheck = await pgClient.query(SCHEMA_EXISTS_SQL, ['metaschema_modules_public']);
if (schemaCheck.rows.length === 0) return null;

const result = await pgClient.query(BILLING_MODULE_SQL, [databaseId]);
const row = result.rows[0];
if (!row?.record_usage_function) return null;

return {
publicSchema: row.public_schema as string,
privateSchema: row.private_schema as string,
recordUsageFunction: row.record_usage_function as string,
// The check_billing_quota function name follows the inflection pattern
checkBillingQuotaFunction: 'check_billing_quota',
};
} catch {
// Schema/table doesn't exist or query failed — billing not available
return null;
}
}

// ─── Public API ─────────────────────────────────────────────────────────────

/**
* Resolve billing config for a database.
* Results are cached per database_id with a 5-minute TTL.
*
* @param pgClient - A client connected to the tenant database (from withPgClient)
* @param databaseId - The database UUID
*/
export async function getLlmBillingConfig(
pgClient: PgClient,
databaseId: string,
): Promise<LlmBillingCacheEntry> {
const cached = billingCache.get(databaseId);
if (cached) return cached;

const billing = await resolveBillingConfig(pgClient, databaseId);

const entry: LlmBillingCacheEntry = { billing };
billingCache.set(databaseId, entry);
return entry;
}

/**
* Invalidate the cached config for a specific database (or all).
*/
export function invalidateLlmBillingConfig(databaseId?: string): void {
if (databaseId) {
billingCache.delete(databaseId);
} else {
billingCache.clear();
}
}

/**
* Get cache stats for diagnostics.
*/
export function getLlmBillingCacheStats(): { size: number; max: number } {
return { size: billingCache.size, max: 50 };
}
24 changes: 8 additions & 16 deletions graphile/graphile-llm/src/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
*/

import OllamaClient from '@agentic-kit/ollama';
import { getEnvOptions } from '@constructive-io/graphql-env';
import { getLlmEnvOptions } from './env';
import type { EmbedderConfig, EmbedderFunction, LlmModuleData } from './types';

// ─── Built-in Providers ─────────────────────────────────────────────────────
Expand Down Expand Up @@ -59,27 +59,19 @@ export function buildEmbedderFromModule(data: LlmModuleData): EmbedderFunction |
provider: data.embedding_provider,
model: data.embedding_model,
baseUrl: data.embedding_base_url,
apiKey: data.api_key_ref,
});
}

/**
* Resolve an embedder from environment variables via getEnvOptions().
* Resolve an embedder from environment variables.
* This is a fallback for development when no llm_module or defaultEmbedder is configured.
*
* Environment variables (parsed by @constructive-io/graphql-env):
* EMBEDDER_PROVIDER - Provider name ('ollama')
* EMBEDDER_MODEL - Model identifier
* EMBEDDER_BASE_URL - Provider base URL
* Environment variables (with defaults from env.ts):
* EMBEDDER_PROVIDER - Provider name (default: 'ollama')
* EMBEDDER_MODEL - Model identifier (default: 'nomic-embed-text')
* EMBEDDER_BASE_URL - Provider base URL (default: 'http://localhost:11434')
*/
export function buildEmbedderFromEnv(): EmbedderFunction | null {
const { llm } = getEnvOptions();
const provider = llm?.embedder?.provider;
if (!provider) return null;

return buildEmbedder({
provider,
model: llm?.embedder?.model,
baseUrl: llm?.embedder?.baseUrl,
});
const { embedding } = getLlmEnvOptions();
return buildEmbedder(embedding);
}
Loading
Loading