feat: add entity analytics, pluggable cache, and pitfalls docs

- Add Cache interface with MemoryCache and NoopCache implementations
- Make SocialhoseClient accept injectable cache option
- Remove Next.js next.revalidate coupling from transport
- Add entity analytics: getEntityBrief, getEntityStats, getEntityBriefs,
  getCampaignIdByMatch with exact sentiment/platform faceting and
  cumulative-differenced timeline
- Add 23 new tests (29 total) covering entity analytics, cache injection,
  sentiment reconciliation, bounded concurrency, and timeline differencing
- Update README with entity analytics, custom caching, and pitfalls sections
- Fix CI branch: main -> master
This commit is contained in:
Mo Elzubeir
2026-05-29 13:10:05 -05:00
parent e34552ac33
commit 252ea713b1
7 changed files with 836 additions and 30 deletions
+41
View File
@@ -0,0 +1,41 @@
export interface Cache {
get(key: string): Promise<unknown | undefined>;
set(key: string, value: unknown, ttlMs: number): Promise<void>;
delete(key: string): Promise<void>;
}
type Entry = { at: number; value: unknown; ttlMs: number };
/** In-memory Map-backed cache with per-entry TTL. */
export class MemoryCache implements Cache {
private readonly map = new Map<string, Entry>();
async get(key: string): Promise<unknown | undefined> {
const entry = this.map.get(key);
if (!entry) return undefined;
if (entry.ttlMs > 0 && Date.now() - entry.at > entry.ttlMs) {
this.map.delete(key);
return undefined;
}
return entry.value;
}
async set(key: string, value: unknown, ttlMs: number): Promise<void> {
this.map.set(key, { at: Date.now(), value, ttlMs });
}
async delete(key: string): Promise<void> {
this.map.delete(key);
}
}
/** No-op cache — all operations are no-ops; useful for disabling caching. */
export class NoopCache implements Cache {
async get(_key: string): Promise<undefined> {
return undefined;
}
async set(_key: string, _value: unknown, _ttlMs: number): Promise<void> {}
async delete(_key: string): Promise<void> {}
}
+281 -16
View File
@@ -1,3 +1,7 @@
export type { Cache } from './cache';
export { MemoryCache, NoopCache } from './cache';
import { type Cache, MemoryCache, NoopCache } from './cache';
export type Sentiment = 'positive' | 'negative' | 'neutral';
export type SentimentSplit = { positive: number; negative: number; neutral: number };
export type QueryValue = string | number | boolean | null | undefined;
@@ -12,10 +16,16 @@ export interface SocialhoseClientOptions {
retries?: number;
retryDelayMs?: (attempt: number) => number;
cacheTtlMs?: number;
/** Inject a custom cache (Redis, Next.js Data Cache, etc.). If provided, cacheTtlMs is ignored for the internal map. */
cache?: Cache;
defaultHeaders?: Record<string, string>;
}
export interface RequestOptions {
/**
* Reserved for cache-implementation use; not forwarded to fetch.
* Pass to your Cache.set() implementation to control per-request TTL.
*/
revalidateSeconds?: number;
signal?: AbortSignal;
headers?: Record<string, string>;
@@ -158,6 +168,12 @@ export interface MailingListInvitation {
export type InviteOutcome = 'invited' | 'already' | 'error';
export interface InviteResult {
outcome: InviteOutcome;
invitation?: MailingListInvitation;
detail?: string;
}
export interface Paginated<T> {
count: number;
next: string | null;
@@ -179,6 +195,37 @@ export interface MentionFilters extends AnalyticsFilters {
ordering?: string;
}
// ---------- Entity-level analytics (content_search faceting) ----------
//
// The analytics endpoints are campaign-scoped only, but /mentions/ accepts
// content_search composed with sentiment/platform/date filters and always
// returns an exact `count`. We build per-entity views by faceting that search:
// one request gives count + a 20-mention sample (sentiment/platform are exact
// when count <= 20, which covers most entities at current volume); a few
// count-only requests add precise week-over-week momentum.
export interface PlatformShare {
platform: string;
count: number;
}
export interface EntityBrief {
term: string;
total: number;
exact: boolean; // sample covers the full population (count <= sample size)
sentiment: SentimentSplit;
platformMix: PlatformShare[];
sample: Mention[]; // up to 20, ordered by engagement
}
export interface EntityStats extends EntityBrief {
recent: Mention[]; // up to 20, newest first
recent7d: number;
prev7d: number;
momentumPct: number | null; // last 7 days vs the prior 7
sparkline: { date: string; count: number }[];
}
export class SocialhoseError extends Error {
readonly status?: number;
readonly path: string;
@@ -195,14 +242,13 @@ export class SocialhoseError extends Error {
}
}
type CacheEntry = { at: number; value: unknown };
type FetchInitWithNext = RequestInit & { next?: { revalidate?: number } };
const DEFAULT_BASE_URL = 'https://socialhose.net/api/public/v1';
const DEFAULT_UA =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36';
// Platform enum the /mentions/ `platforms` filter accepts.
const PLATFORM_KEYS = ['twitter', 'reddit', 'facebook', 'instagram', 'tiktok', 'linkedin'] as const;
function sleep(ms: number): Promise<void> {
if (ms <= 0) return Promise.resolve();
return new Promise((resolve) => setTimeout(resolve, ms));
@@ -245,6 +291,27 @@ function timeoutSignal(timeoutMs: number, upstream?: AbortSignal): AbortSignal {
return AbortSignal.timeout(timeoutMs);
}
/** Returns YYYY-MM-DD for today ± offsetDays (UTC). */
function isoDay(offsetDays = 0): string {
const d = new Date();
d.setUTCDate(d.getUTCDate() + offsetDays);
return d.toISOString().slice(0, 10);
}
function sentimentOf(mentions: Mention[]): SentimentSplit {
const s: SentimentSplit = { positive: 0, negative: 0, neutral: 0 };
for (const m of mentions) if (m.sentiment in s) s[m.sentiment] += 1;
return s;
}
function platformMixOf(mentions: Mention[]): PlatformShare[] {
const mix = new Map<string, number>();
for (const m of mentions) mix.set(m.platform, (mix.get(m.platform) ?? 0) + 1);
return [...mix.entries()]
.map(([platform, count]) => ({ platform, count }))
.sort((a, b) => b.count - a.count);
}
export class SocialhoseClient {
readonly apiKey: string;
readonly baseUrl: string;
@@ -256,7 +323,8 @@ export class SocialhoseClient {
private readonly fetchImpl: typeof fetch;
private readonly retryDelayMs: (attempt: number) => number;
private readonly defaultHeaders: Record<string, string>;
private readonly cache = new Map<string, CacheEntry>();
private readonly cacheImpl: Cache;
private readonly usedCacheKeys = new Set<string>();
constructor(options: SocialhoseClientOptions) {
if (!options.fetch && typeof fetch === 'undefined') throw new Error('A fetch implementation is required');
@@ -269,20 +337,26 @@ export class SocialhoseClient {
this.retryDelayMs = options.retryDelayMs ?? ((attempt) => 400 * 2 ** attempt + Math.random() * 200);
this.cacheTtlMs = options.cacheTtlMs ?? 60_000;
this.defaultHeaders = options.defaultHeaders ?? {};
this.cacheImpl = options.cache ?? (this.cacheTtlMs > 0 ? new MemoryCache() : new NoopCache());
}
clearCache(): void {
this.cache.clear();
async clearCache(): Promise<void> {
const keys = [...this.usedCacheKeys];
this.usedCacheKeys.clear();
await Promise.all(keys.map((k) => this.cacheImpl.delete(k)));
}
async get<T>(path: string, params: QueryParams = {}, options: RequestOptions = {}): Promise<T> {
const query = encodeParams(params);
const url = `${joinUrl(this.baseUrl, path)}${query ? `?${query}` : ''}`;
const hit = this.cache.get(url);
if (this.cacheTtlMs > 0 && hit && Date.now() - hit.at < this.cacheTtlMs) return hit.value as T;
const cached = await this.cacheImpl.get(url);
if (cached !== undefined) return cached as T;
const value = await this.request<T>('GET', path, url, undefined, options);
if (this.cacheTtlMs > 0) this.cache.set(url, { at: Date.now(), value });
const ttlMs = options.revalidateSeconds !== undefined ? options.revalidateSeconds * 1000 : this.cacheTtlMs;
await this.cacheImpl.set(url, value, ttlMs);
this.usedCacheKeys.add(url);
return value;
}
@@ -389,7 +463,7 @@ export class SocialhoseClient {
listId: string,
invite: { email: string; first_name?: string; last_name?: string; invitation_message?: string },
options: RequestOptions = {},
): Promise<{ outcome: InviteOutcome; invitation?: MailingListInvitation; detail?: string }> {
): Promise<InviteResult> {
const { status, data } = await this.post<{
status?: string;
invitation?: MailingListInvitation;
@@ -401,6 +475,201 @@ export class SocialhoseClient {
return { outcome: 'error', detail: data?.detail ?? `Socialhose returned HTTP ${status}` };
}
// ---------- Entity analytics ----------
/** One request: count + a top-engagement sample, with sentiment/platform derived. */
async getEntityBrief(term: string, campaignId?: string, options: RequestOptions = {}): Promise<EntityBrief> {
const d = await this.getMentions(
{ campaign_ids: campaignId, content_search: term, ordering: '-engagement_count' },
options,
);
return {
term,
total: d.count,
exact: d.count <= d.results.length,
sentiment: sentimentOf(d.results),
platformMix: platformMixOf(d.results),
sample: d.results,
};
}
/**
* Full single-entity dashboard: brief + exact distributions + newest mentions + daily timeline.
* All sub-requests are best-effort — a single failure never zeros out the entity.
*/
async getEntityStats(term: string, campaignId?: string, options: RequestOptions = {}): Promise<EntityStats> {
const base = { campaign_ids: campaignId, content_search: term };
const brief = await this.getEntityBrief(term, campaignId, options);
const [recentPage, sparkline, exactSentiment, exactPlatformMix] = await Promise.all([
this.getMentions({ ...base, ordering: '-published_at' }, options).catch(() => null),
this.getEntityTimeline(term, campaignId, brief.total, undefined, options).catch(
() => [] as { date: string; count: number }[],
),
this.getExactSentiment(term, campaignId, brief.total, options).catch(() => null),
this.getExactPlatformMix(term, campaignId, brief.total, options).catch(() => null),
]);
const recent = recentPage?.results ?? [];
const sumOf = (pts: { count: number }[]) => pts.reduce((s, p) => s + p.count, 0);
const recent7d = sumOf(sparkline.slice(-7));
const prev7d = sumOf(sparkline.slice(-14, -7));
const momentumPct =
sparkline.length === 0
? null
: prev7d > 0
? ((recent7d - prev7d) / prev7d) * 100
: recent7d > 0
? 100
: null;
const distributionsExact = exactSentiment !== null && exactPlatformMix !== null;
return {
...brief,
exact: distributionsExact ? true : brief.exact,
sentiment: exactSentiment ?? brief.sentiment,
platformMix: exactPlatformMix ?? brief.platformMix,
recent,
recent7d,
prev7d,
momentumPct,
sparkline,
};
}
/** Fetch briefs for many entities with bounded concurrency (rate-limit friendly). */
async getEntityBriefs(
terms: string[],
campaignId?: string,
concurrency = 20,
options: RequestOptions = {},
): Promise<Map<string, EntityBrief>> {
const out = new Map<string, EntityBrief>();
let cursor = 0;
const worker = async () => {
while (cursor < terms.length) {
const term = terms[cursor++];
try {
out.set(term, await this.getEntityBrief(term, campaignId, options));
} catch {
// Silent: the entity simply doesn't appear in the map.
}
}
};
await Promise.all(Array.from({ length: Math.min(concurrency, terms.length) }, worker));
return out;
}
/** Resolve a live campaign id by matching a substring of its name. */
async getCampaignIdByMatch(match: string, options: RequestOptions = {}): Promise<string | undefined> {
const campaigns = await this.getCampaigns(options).catch(() => [] as Campaign[]);
const needle = match.toLowerCase();
return campaigns.find((c) => c.name.toLowerCase().includes(needle))?.id;
}
/**
* Exact sentiment split for a term, by faceting /mentions/ count over each
* sentiment value. Returns null if any facet fails or totals don't reconcile
* with `total` (API silently ignored the filter).
*/
private async getExactSentiment(
term: string,
campaignId: string | undefined,
total: number,
options: RequestOptions = {},
): Promise<SentimentSplit | null> {
const order: Sentiment[] = ['positive', 'negative', 'neutral'];
const counts = await Promise.all(
order.map((s) =>
this.getMentions({ campaign_ids: campaignId, content_search: term, sentiments: s }, options)
.then((d) => d.count)
.catch(() => null),
),
);
if (counts.some((c) => c === null)) return null;
const [positive, negative, neutral] = counts as number[];
// Every mention carries exactly one sentiment; facets must sum to the known total.
// A mismatch means the filter wasn't honored — distrust it.
if (positive + negative + neutral !== total) return null;
return { positive, negative, neutral };
}
/**
* Exact platform mix for a term, by faceting /mentions/ count over each platform.
* Returns null if any facet fails or the facets sum to more than the known total
* (the `platforms` filter was ignored). A sum below total is fine — mentions may
* sit on platforms outside PLATFORM_KEYS.
*/
private async getExactPlatformMix(
term: string,
campaignId: string | undefined,
total: number,
options: RequestOptions = {},
): Promise<PlatformShare[] | null> {
const counts = await Promise.all(
PLATFORM_KEYS.map((platform) =>
this.getMentions({ campaign_ids: campaignId, content_search: term, platforms: platform }, options)
.then((d) => ({ platform, count: d.count }) as PlatformShare)
.catch(() => null),
),
);
if (counts.some((c) => c === null)) return null;
const mix = counts as PlatformShare[];
if (mix.reduce((sum, p) => sum + p.count, 0) > total) return null;
return mix.filter((p) => p.count > 0).sort((a, b) => b.count - a.count);
}
/**
* Real daily mention volume for the last `days` days.
*
* Uses CUMULATIVE DIFFERENCING (date_from only) rather than [date_from, date_to]
* windows. The API counts date_to inclusively, so adjacent windows share a day
* and double-count interior days. Differences of a monotonic cumulative series
* are non-negative and sum to at most `total`.
*
* If the earliest cumulative exceeds `total`, content_search was dropped on
* date-filtered queries — return [] rather than wrong bars.
*/
private async getEntityTimeline(
term: string,
campaignId: string | undefined,
total: number,
days = 14,
options: RequestOptions = {},
): Promise<{ date: string; count: number }[]> {
// One boundary per day plus tomorrow (+1) so today differences out cleanly.
const offsets = Array.from({ length: days + 1 }, (_, i) => -(days - 1 - i)); // oldest .. +1
const cumulative = new Array<number | null>(offsets.length).fill(null);
let cursor = 0;
const worker = async () => {
while (cursor < offsets.length) {
const idx = cursor++;
const d = await this.getMentions(
{ campaign_ids: campaignId, content_search: term, date_from: isoDay(offsets[idx]) },
options,
).catch(() => null);
if (d) cumulative[idx] = d.count;
}
};
// Bounded fan-out: the retry/backoff absorbs the rest of the burst.
await Promise.all(Array.from({ length: Math.min(6, offsets.length) }, worker));
const earliest = cumulative[0];
if (earliest != null && earliest > total) return [];
const points: { date: string; count: number }[] = [];
for (let i = 0; i < days; i++) {
const hi = cumulative[i]; // on/after day i
const lo = cumulative[i + 1]; // on/after the next day
if (hi == null || lo == null) continue;
points.push({ date: isoDay(offsets[i]), count: Math.max(0, hi - lo) });
}
return points;
}
private async request<T>(
method: 'GET' | 'POST',
path: string,
@@ -426,7 +695,7 @@ export class SocialhoseClient {
for (let attempt = 0; attempt <= this.retries; attempt++) {
try {
const init: FetchInitWithNext = {
const init: RequestInit = {
method,
headers: {
...this.defaultHeaders,
@@ -439,9 +708,6 @@ export class SocialhoseClient {
body: method === 'POST' ? JSON.stringify(body) : undefined,
signal: timeoutSignal(this.timeoutMs, options.signal),
};
if (method === 'GET' && options.revalidateSeconds !== undefined) {
init.next = { revalidate: options.revalidateSeconds };
}
if (method === 'POST') init.cache = 'no-store';
res = await this.fetchImpl(url, init);
@@ -482,4 +748,3 @@ export class SocialhoseClient {
export function createSocialhoseClient(options: SocialhoseClientOptions): SocialhoseClient {
return new SocialhoseClient(options);
}