client-finder/src/expansion.ts

198 lines
4.6 KiB
TypeScript

import { ExpansionResult } from './types.js';
/**
* Trim whitespace from string
*/
function trim(input: string): string {
return input.replace(/^\s+|\s+$/g, '');
}
/**
* Compact multiple spaces into single space
*/
function compact(input: string): string {
return input.replace(/\s+/g, ' ').trim();
}
/**
* Normalize query by trimming and removing prefix
*/
export function normalizeQuery(input: string): string {
const normalized = trim(input);
// Remove leading "cold-outreach:" prefix (case-insensitive)
return normalized.replace(/^[Cc][Oo][Ll][Dd]-[Oo][Uu][Tt][Rr][Ee][Aa][Cc][Hh]:\s*/, '');
}
/**
* Deduplicate array while preserving order
*/
function dedupeKeepOrder(items: string[]): string[] {
const seen = new Set<string>();
const output: string[] = [];
for (const item of items) {
const cleaned = compact(item);
if (!cleaned) continue;
const key = cleaned.toLowerCase();
if (seen.has(key)) continue;
seen.add(key);
output.push(cleaned);
}
return output;
}
/**
* Parse LLM expansion JSON
*/
function parseLLMExpansion(llmExpansion: string): ExpansionResult {
try {
const parsed = JSON.parse(llmExpansion);
let expanded: string[];
let primary: string;
if (Array.isArray(parsed)) {
expanded = dedupeKeepOrder(parsed);
primary = expanded[0] || '';
} else if (typeof parsed === 'object' && parsed !== null) {
expanded = dedupeKeepOrder(
(parsed as any).expandedQueries || (parsed as any).queries || []
);
primary = compact(
(parsed as any).primaryQuery || (parsed as any).primary_query || (expanded[0] || '')
);
} else {
return {
ok: false,
error: 'QUERY_EXPANSION_JSON must be an array or object',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
if (expanded.length === 0) {
return {
ok: false,
error: 'expandedQueries is empty',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
if (!primary) {
return {
ok: false,
error: 'primaryQuery is empty',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
// Ensure primary is in expanded queries
if (!expanded.some(q => q.toLowerCase() === primary.toLowerCase())) {
expanded.unshift(primary);
}
return {
ok: true,
error: '',
expandedQueries: expanded,
primaryQuery: primary,
expansionSource: 'llm',
};
} catch (error) {
return {
ok: false,
error: 'QUERY_EXPANSION_JSON is not valid JSON',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
}
/**
* Generate rule-based expansion
*/
function generateRuleExpansion(rawQuery: string, countryUpper: string): ExpansionResult {
const base = compact(rawQuery);
const ruleCandidates: string[] = [
`${base} ${countryUpper}`,
`${base} supplier ${countryUpper}`,
`${base} wholesale ${countryUpper}`,
`${base} distributor ${countryUpper}`,
`${base} b2b ${countryUpper}`,
];
const lower = base.toLowerCase();
if (lower.includes('coffee')) {
ruleCandidates.push(
`coffee shop ${countryUpper}`,
`coffee roastery ${countryUpper}`,
`specialty coffee wholesale ${countryUpper}`,
);
}
if (lower.includes('office machine') || lower.includes('office equipment')) {
ruleCandidates.push(
`office equipment supplier ${countryUpper}`,
`office machine distributor ${countryUpper}`,
);
}
const expanded = dedupeKeepOrder(ruleCandidates);
if (expanded.length === 0) {
return {
ok: false,
error: 'failed to build expanded queries',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
return {
ok: true,
error: '',
expandedQueries: expanded.slice(0, 8), // Limit to 8 queries
primaryQuery: expanded[0],
expansionSource: 'rule',
};
}
/**
* Resolve query expansion from LLM JSON or rule-based logic
*/
export function resolveExpansion(
rawQuery: string,
countryUpper: string,
llmExpansion: string,
): ExpansionResult {
const normalized = compact(rawQuery);
if (!normalized) {
return {
ok: false,
error: 'query is empty after normalization',
expandedQueries: [],
primaryQuery: '',
expansionSource: '',
};
}
// If LLM expansion is provided, use it
if (llmExpansion.trim()) {
return parseLLMExpansion(llmExpansion);
}
// Otherwise use rule-based expansion
return generateRuleExpansion(normalized, countryUpper);
}