import { ExpansionResult } from './types.js'; /** * Trim whitespace from string */ function trim(input: string): string { return input.replace(/^\s+|\s+$/g, ''); } /** * Compact multiple spaces into single space */ function compact(input: string): string { return input.replace(/\s+/g, ' ').trim(); } /** * Normalize query by trimming and removing prefix */ export function normalizeQuery(input: string): string { const normalized = trim(input); // Remove leading "cold-outreach:" prefix (case-insensitive) return normalized.replace(/^[Cc][Oo][Ll][Dd]-[Oo][Uu][Tt][Rr][Ee][Aa][Cc][Hh]:\s*/, ''); } /** * Deduplicate array while preserving order */ function dedupeKeepOrder(items: string[]): string[] { const seen = new Set(); const output: string[] = []; for (const item of items) { const cleaned = compact(item); if (!cleaned) continue; const key = cleaned.toLowerCase(); if (seen.has(key)) continue; seen.add(key); output.push(cleaned); } return output; } /** * Parse LLM expansion JSON */ function parseLLMExpansion(llmExpansion: string): ExpansionResult { try { const parsed = JSON.parse(llmExpansion); let expanded: string[]; let primary: string; if (Array.isArray(parsed)) { expanded = dedupeKeepOrder(parsed); primary = expanded[0] || ''; } else if (typeof parsed === 'object' && parsed !== null) { expanded = dedupeKeepOrder( (parsed as any).expandedQueries || (parsed as any).queries || [] ); primary = compact( (parsed as any).primaryQuery || (parsed as any).primary_query || (expanded[0] || '') ); } else { return { ok: false, error: 'QUERY_EXPANSION_JSON must be an array or object', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } if (expanded.length === 0) { return { ok: false, error: 'expandedQueries is empty', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } if (!primary) { return { ok: false, error: 'primaryQuery is empty', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } // Ensure primary is in expanded queries if (!expanded.some(q => q.toLowerCase() === primary.toLowerCase())) { expanded.unshift(primary); } return { ok: true, error: '', expandedQueries: expanded, primaryQuery: primary, expansionSource: 'llm', }; } catch (error) { return { ok: false, error: 'QUERY_EXPANSION_JSON is not valid JSON', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } } /** * Generate rule-based expansion */ function generateRuleExpansion(rawQuery: string, countryUpper: string): ExpansionResult { const base = compact(rawQuery); const ruleCandidates: string[] = [ `${base} ${countryUpper}`, `${base} supplier ${countryUpper}`, `${base} wholesale ${countryUpper}`, `${base} distributor ${countryUpper}`, `${base} b2b ${countryUpper}`, ]; const lower = base.toLowerCase(); if (lower.includes('coffee')) { ruleCandidates.push( `coffee shop ${countryUpper}`, `coffee roastery ${countryUpper}`, `specialty coffee wholesale ${countryUpper}`, ); } if (lower.includes('office machine') || lower.includes('office equipment')) { ruleCandidates.push( `office equipment supplier ${countryUpper}`, `office machine distributor ${countryUpper}`, ); } const expanded = dedupeKeepOrder(ruleCandidates); if (expanded.length === 0) { return { ok: false, error: 'failed to build expanded queries', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } return { ok: true, error: '', expandedQueries: expanded.slice(0, 8), // Limit to 8 queries primaryQuery: expanded[0], expansionSource: 'rule', }; } /** * Resolve query expansion from LLM JSON or rule-based logic */ export function resolveExpansion( rawQuery: string, countryUpper: string, llmExpansion: string, ): ExpansionResult { const normalized = compact(rawQuery); if (!normalized) { return { ok: false, error: 'query is empty after normalization', expandedQueries: [], primaryQuery: '', expansionSource: '', }; } // If LLM expansion is provided, use it if (llmExpansion.trim()) { return parseLLMExpansion(llmExpansion); } // Otherwise use rule-based expansion return generateRuleExpansion(normalized, countryUpper); }