198 lines
4.6 KiB
TypeScript
198 lines
4.6 KiB
TypeScript
import { ExpansionResult } from './types.js';
|
|
|
|
/**
|
|
* Trim whitespace from string
|
|
*/
|
|
function trim(input: string): string {
|
|
return input.replace(/^\s+|\s+$/g, '');
|
|
}
|
|
|
|
/**
|
|
* Compact multiple spaces into single space
|
|
*/
|
|
function compact(input: string): string {
|
|
return input.replace(/\s+/g, ' ').trim();
|
|
}
|
|
|
|
/**
|
|
* Normalize query by trimming and removing prefix
|
|
*/
|
|
export function normalizeQuery(input: string): string {
|
|
const normalized = trim(input);
|
|
// Remove leading "cold-outreach:" prefix (case-insensitive)
|
|
return normalized.replace(/^[Cc][Oo][Ll][Dd]-[Oo][Uu][Tt][Rr][Ee][Aa][Cc][Hh]:\s*/, '');
|
|
}
|
|
|
|
/**
|
|
* Deduplicate array while preserving order
|
|
*/
|
|
function dedupeKeepOrder(items: string[]): string[] {
|
|
const seen = new Set<string>();
|
|
const output: string[] = [];
|
|
|
|
for (const item of items) {
|
|
const cleaned = compact(item);
|
|
if (!cleaned) continue;
|
|
|
|
const key = cleaned.toLowerCase();
|
|
if (seen.has(key)) continue;
|
|
|
|
seen.add(key);
|
|
output.push(cleaned);
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/**
|
|
* Parse LLM expansion JSON
|
|
*/
|
|
function parseLLMExpansion(llmExpansion: string): ExpansionResult {
|
|
try {
|
|
const parsed = JSON.parse(llmExpansion);
|
|
let expanded: string[];
|
|
let primary: string;
|
|
|
|
if (Array.isArray(parsed)) {
|
|
expanded = dedupeKeepOrder(parsed);
|
|
primary = expanded[0] || '';
|
|
} else if (typeof parsed === 'object' && parsed !== null) {
|
|
expanded = dedupeKeepOrder(
|
|
(parsed as any).expandedQueries || (parsed as any).queries || []
|
|
);
|
|
primary = compact(
|
|
(parsed as any).primaryQuery || (parsed as any).primary_query || (expanded[0] || '')
|
|
);
|
|
} else {
|
|
return {
|
|
ok: false,
|
|
error: 'QUERY_EXPANSION_JSON must be an array or object',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
|
|
if (expanded.length === 0) {
|
|
return {
|
|
ok: false,
|
|
error: 'expandedQueries is empty',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
|
|
if (!primary) {
|
|
return {
|
|
ok: false,
|
|
error: 'primaryQuery is empty',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
|
|
// Ensure primary is in expanded queries
|
|
if (!expanded.some(q => q.toLowerCase() === primary.toLowerCase())) {
|
|
expanded.unshift(primary);
|
|
}
|
|
|
|
return {
|
|
ok: true,
|
|
error: '',
|
|
expandedQueries: expanded,
|
|
primaryQuery: primary,
|
|
expansionSource: 'llm',
|
|
};
|
|
} catch (error) {
|
|
return {
|
|
ok: false,
|
|
error: 'QUERY_EXPANSION_JSON is not valid JSON',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate rule-based expansion
|
|
*/
|
|
function generateRuleExpansion(rawQuery: string, countryUpper: string): ExpansionResult {
|
|
const base = compact(rawQuery);
|
|
const ruleCandidates: string[] = [
|
|
`${base} ${countryUpper}`,
|
|
`${base} supplier ${countryUpper}`,
|
|
`${base} wholesale ${countryUpper}`,
|
|
`${base} distributor ${countryUpper}`,
|
|
`${base} b2b ${countryUpper}`,
|
|
];
|
|
|
|
const lower = base.toLowerCase();
|
|
|
|
if (lower.includes('coffee')) {
|
|
ruleCandidates.push(
|
|
`coffee shop ${countryUpper}`,
|
|
`coffee roastery ${countryUpper}`,
|
|
`specialty coffee wholesale ${countryUpper}`,
|
|
);
|
|
}
|
|
|
|
if (lower.includes('office machine') || lower.includes('office equipment')) {
|
|
ruleCandidates.push(
|
|
`office equipment supplier ${countryUpper}`,
|
|
`office machine distributor ${countryUpper}`,
|
|
);
|
|
}
|
|
|
|
const expanded = dedupeKeepOrder(ruleCandidates);
|
|
|
|
if (expanded.length === 0) {
|
|
return {
|
|
ok: false,
|
|
error: 'failed to build expanded queries',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
|
|
return {
|
|
ok: true,
|
|
error: '',
|
|
expandedQueries: expanded.slice(0, 8), // Limit to 8 queries
|
|
primaryQuery: expanded[0],
|
|
expansionSource: 'rule',
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Resolve query expansion from LLM JSON or rule-based logic
|
|
*/
|
|
export function resolveExpansion(
|
|
rawQuery: string,
|
|
countryUpper: string,
|
|
llmExpansion: string,
|
|
): ExpansionResult {
|
|
const normalized = compact(rawQuery);
|
|
|
|
if (!normalized) {
|
|
return {
|
|
ok: false,
|
|
error: 'query is empty after normalization',
|
|
expandedQueries: [],
|
|
primaryQuery: '',
|
|
expansionSource: '',
|
|
};
|
|
}
|
|
|
|
// If LLM expansion is provided, use it
|
|
if (llmExpansion.trim()) {
|
|
return parseLLMExpansion(llmExpansion);
|
|
}
|
|
|
|
// Otherwise use rule-based expansion
|
|
return generateRuleExpansion(normalized, countryUpper);
|
|
}
|