diff --git a/.changeset/provider-search-controls.md b/.changeset/provider-search-controls.md new file mode 100644 index 0000000..3be1a6e --- /dev/null +++ b/.changeset/provider-search-controls.md @@ -0,0 +1,18 @@ +--- +"@refkit/core": minor +"@refkit/mcp": minor +"@refkit/provider-unsplash": minor +"@refkit/provider-pexels": minor +"@refkit/provider-pixabay": minor +"@refkit/provider-flickr": minor +"@refkit/provider-brave": minor +"@refkit/provider-openverse": minor +"@refkit/provider-gutendex": minor +"@refkit/provider-poetrydb": minor +"@refkit/provider-wikimedia-commons": minor +"@refkit/provider-met": minor +"@refkit/provider-artic": minor +"@refkit/provider-smithsonian": minor +--- + +Add unified search controls, provider capability metadata, MCP controls input, search metadata/explanations, practical provider-specific `providerOptions` whitelists, and a core duplicate hook for agent-facing searches. diff --git a/.gitignore b/.gitignore index 420e6c1..18cb28f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ node_modules/ dist/ out/ coverage/ +.superpowers/ *.log .DS_Store .env diff --git a/README.md b/README.md index 8b15469..7ff68d3 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,61 @@ for (const r of refs) { const safe = await refkit.search({ query: 'forest', modalities: ['image'], gateFor: 'commercial-product' }) ``` +## Search controls + +Use provider-neutral `controls` for the main path. refkit routes each control only to providers that declare support, and `searchWithMeta()` explains which providers applied or ignored each control: + +```ts +await refkit.search({ + query: 'brutalist library interior', + modalities: ['image'], + controls: { + orientation: 'landscape', + color: 'blue', + language: 'en-US', + sort: 'relevance', + safety: 'strict', + license: { commercial: true, modification: true }, + media: { minWidth: 1200, minHeight: 800 }, + }, +}) +``` + +Use `providerOptions` for provider-specific escape hatches that do not belong in the common contract. These are **typed whitelists**, not raw passthrough maps: each provider package translates the practical official search parameters it supports and ignores unsupported values. + +```ts +await refkit.search({ + query: 'forest path', + modalities: ['image'], + controls: { orientation: 'landscape', safety: 'strict' }, + providerOptions: { + unsplash: { collections: ['abc', 'def'], page: 2 }, + flickr: { tags: ['forest', 'path'], tagMode: 'all', minTakenDate: '2020-01-01' }, + brave: { country: 'US', searchLang: 'en', spellcheck: false }, + met: { departmentId: 11, isOnView: true }, + gutendex: { topic: 'children', sort: 'popular' }, + }, +}) +``` + +The provider package owns its native options surface, e.g. `UnsplashSearchOptions`, `FlickrSearchOptions`, `OpenverseImageSearchOptions`, `MetSearchOptions`, and `PoetryDbSearchOptions`. Response-format/debug parameters and auth-only knobs are intentionally omitted when they would break refkit's normalized `Reference` contract. + +When an agent or UI needs to explain what happened, use `searchWithMeta`: + +```ts +const { references, meta } = await refkit.searchWithMeta({ + query: 'forest path', + modalities: ['image'], + controls: { orientation: 'landscape', color: 'green' }, + gateFor: 'commercial-product', +}) + +console.log(meta.controls?.appliedByProvider) +console.log(meta.controls?.ignoredByProvider) +console.log(meta.providers) +console.log(meta.warnings) +``` + ## Ranking & rerank By default, results are fused across sources with **Reciprocal Rank Fusion** — cross-source-orderable, but not query-aware. For sharper relevance, pass a **reranker**: @@ -81,6 +136,19 @@ rerank: async ({ query, refs }) => myEmbeddingRerank(query, refs) Rerank is **opt-in** — omit it for the default RRF order. It runs post-merge, before the `gateFor` license filter and the limit. +URL dedupe is built in, and perceptual hashes are supported when providers or hosts supply them. For host-computed fingerprints or embeddings, add a duplicate hook without making core fetch or decode media: + +```ts +const refkit = createRefkit({ + providers, + merge: { + isDuplicate: (candidate, existing) => + (candidate.raw as { fingerprint?: string }).fingerprint === + (existing.raw as { fingerprint?: string }).fingerprint, + }, +}) +``` + ## Providers | Package | Source | Modality | Auth | License | @@ -123,7 +191,14 @@ Audio/video are extra factories on existing packages: `openverseAudio()`, `pexel - **No re-hosting** — keep `canonicalUrl` + thumbnails only; never store originals. - **strict-deny** — when rights can't be determined, deny / needs-review (never fail-open). Unknown, NonCommercial, NoDerivatives and "no known copyright restrictions" never map to a usable license. -## Agent / MCP +## Agent usage + +Agents can use refkit in two ways: + +1. **SDK inside a host tool** — your app defines its own `search` tool, wires `createRefkit({ providers, fetch, cache })`, and controls keys, caching, retries, rerankers, filters, and provider-specific options. +2. **MCP adapter** — `@refkit/mcp` exposes the same license-normalized search over `search_references`, useful when you want a zero-glue tool that works across MCP-capable agents. + +## MCP `@refkit/mcp` exposes `search_references` over the [Model Context Protocol](https://modelcontextprotocol.io), so any MCP-capable agent can search license-normalized references with zero glue code. diff --git a/packages/core/README.md b/packages/core/README.md index 64f44cf..4fb1eef 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -39,6 +39,73 @@ for (const r of refs) { const safe = await refkit.search({ query: 'forest', modalities: ['image'], gateFor: 'commercial-product' }) ``` +## Search controls + +Portable controls are expressed once and applied only to providers that declare support: + +```ts +await refkit.search({ + query: 'minimal workspace', + modalities: ['image'], + controls: { + orientation: 'landscape', + color: 'white', + language: 'en-US', + }, +}) +``` + +Provider-specific escape hatches go under `providerOptions`, keyed by provider id. Core routes only the matching entry; providers own typed whitelists for the practical official search parameters they translate: + +```ts +await refkit.search({ + query: 'mountain trail', + modalities: ['image'], + controls: { orientation: 'landscape', safety: 'strict' }, + providerOptions: { + unsplash: { collections: ['abc', 'def'], page: 2 }, + flickr: { sort: 'relevance', tags: ['mountain', 'trail'], tagMode: 'all' }, + openverse: { source: ['flickr'], category: 'photograph', aspectRatio: 'wide' }, + smithsonian: { sort: 'newest', rows: 25 }, + }, +}) +``` + +`providerOptions` is not a raw upstream passthrough. Each provider package exports its own `*SearchOptions` interface and keeps response-format/debug/auth-only parameters out when they would conflict with normalized references or provider credentials. + +Currently supported unified controls: + +| Provider id | Unified controls | +|---|---| +| `unsplash` | `orientation`, `color`, `language`, `sort`, `safety` | +| `pexels` | `orientation`, `color`, `language`, `media.size`, `page` | +| `pexels-video` | `orientation`, `language`, `media.size`, `page` | +| `pixabay` | `orientation`, `color`, `language`, `sort`, `safety`, `media.kind`, `media.minWidth`, `media.minHeight` | +| `pixabay-video` | `language`, `sort`, `safety`, `media.kind`, `media.minWidth`, `media.minHeight` | +| `flickr` | `sort`, `safety`, `license.commercial`, `license.modification`, `license.allowUnknown`, `creator.id` | +| `brave` | `safety` | +| `openverse` | `license.commercial`, `license.modification`, `license.allowUnknown` | +| `openverse-audio` | `license.commercial`, `license.modification`, `license.allowUnknown` | +| `gutendex` | `language`, `text.copyright`, `page` | +| `poetrydb`, `wikimedia-commons`, `met`, `artic`, `smithsonian` | no unified controls in this release | + +Use `searchWithMeta` when a host UI or agent needs the search explanation layer: + +```ts +const { references, meta } = await refkit.searchWithMeta({ + query: 'minimal workspace', + modalities: ['image'], + controls: { orientation: 'landscape', color: 'white' }, + gateFor: 'commercial-product', +}) + +meta.controls?.appliedByProvider +meta.controls?.ignoredByProvider +meta.providers // provider status: fulfilled / failed / skipped +meta.gate // before/after/dropped counts when gateFor is used +meta.warnings // partial-result and gate/drop notes +``` + ## Ranking & rerank Results are fused across sources with **Reciprocal Rank Fusion** (cross-source-orderable, not query-aware). Pass an optional `rerank`: @@ -50,6 +117,23 @@ Rerank is opt-in and runs post-merge, before the `gateFor` license filter and th Ranking is only as good as the candidate pool: `search` overfetches `limit × poolFactor` per provider (default 4×, capped per source) and narrows to `limit` after merge/rerank/gate — so dedup and ranking see a wide pool, not a source-truncated slice. Lower `poolFactor` when you query many providers. +## Dedupe hooks + +Core dedupes exact canonical URLs by default and can dedupe equal-length perceptual hashes when `merge.hashThreshold` is set. Hosts that compute their own fingerprints or embeddings can add a sync duplicate predicate: + +```ts +const refkit = createRefkit({ + providers, + merge: { + isDuplicate: (candidate, existing) => + (candidate.raw as { fingerprint?: string }).fingerprint === + (existing.raw as { fingerprint?: string }).fingerprint, + }, +}) +``` + +The hook compares `Reference` objects only. Core still never fetches, decodes, or stores media. + ## Invariants (enforced by `src/__tests__/no-network.test.ts`) - **Zero network** — no `fetch` call, no hard-coded endpoint in this package. Hosts inject `ProviderContext.fetch`. diff --git a/packages/core/src/__tests__/client.test.ts b/packages/core/src/__tests__/client.test.ts index fffdff4..f043d30 100644 --- a/packages/core/src/__tests__/client.test.ts +++ b/packages/core/src/__tests__/client.test.ts @@ -171,4 +171,97 @@ describe('createRefkit', () => { await rk.search({ query: 'x', modalities: ['image'], limit: 150 }) // > cap → fetch the limit itself, not less expect(sink.limit).toBe(150) }) + + it('forwards provider-specific search options only to the matching provider', async () => { + let seenA: unknown + let seenB: unknown + const a = defineProvider({ + id: 'a', + modalities: ['image'], + queryFeatures: ['keyword'], + search: async (q) => { seenA = q.providerOptions; return [] }, + }) + const b = defineProvider({ + id: 'b', + modalities: ['image'], + queryFeatures: ['keyword'], + search: async (q) => { seenB = q.providerOptions; return [] }, + }) + const rk = createRefkit({ providers: [a, b] }) + await rk.search({ + query: 'x', + modalities: ['image'], + providerOptions: { a: { orderBy: 'latest' }, b: { sort: 'relevance' } }, + }) + expect(seenA).toEqual({ orderBy: 'latest' }) + expect(seenB).toEqual({ sort: 'relevance' }) + }) + + it('searchWithMeta returns provider status, warnings, and gate summary', async () => { + const textOnly = defineProvider({ + id: 'text', + modalities: ['text'], + queryFeatures: ['keyword'], + search: async () => [], + }) + const rk = createRefkit({ + providers: [ + provider('ok', [ref('ok-1', 'https://ok/1', 'CC0-1.0'), ref('ok-2', 'https://ok/2', 'proprietary')]), + failing('bad'), + textOnly, + ], + }) + const out = await rk.searchWithMeta({ query: 'x', modalities: ['image'], gateFor: 'commercial-product' }) + + expect(out.references.map(r => r.canonicalUrl)).toEqual(['https://ok/1']) + expect(out.meta.providers).toEqual([ + { providerId: 'ok', status: 'fulfilled', returned: 2, accepted: 2, rejected: 0 }, + { providerId: 'bad', status: 'failed', error: 'boom' }, + { providerId: 'text', status: 'skipped', reason: 'unsupported-modality' }, + ]) + expect(out.meta.gate).toEqual({ intent: 'commercial-product', before: 2, after: 1, dropped: 1 }) + expect(out.meta.warnings).toContain('1 provider(s) failed; returning partial results.') + }) + + it('uses merge.isDuplicate to dedupe host-supplied fingerprints during search', async () => { + const a = { ...ref('a-1', 'https://a/1'), relevance: 0.2, raw: { fingerprint: 'same' } } + const b = { ...ref('a-2', 'https://a/2'), relevance: 0.9, raw: { fingerprint: 'same' } } + const rk = createRefkit({ + providers: [provider('a', [b, a])], + merge: { + isDuplicate: (candidate, existing) => + (candidate.raw as { fingerprint?: string }).fingerprint === (existing.raw as { fingerprint?: string }).fingerprint, + }, + }) + const out = await rk.search({ query: 'x', modalities: ['image'] }) + expect(out.map(r => r.id)).toEqual(['a-2']) + }) + + it('searchWithMeta reports applied and ignored unified controls by provider', async () => { + const controlled = defineProvider({ + id: 'controlled', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'color'] }, + search: async () => [ref('controlled-1', 'https://controlled/1')], + }) + const plain = defineProvider({ + id: 'plain', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: [] }, + search: async () => [ref('plain-1', 'https://plain/1')], + }) + const rk = createRefkit({ providers: [controlled, plain] }) + const out = await rk.searchWithMeta({ + query: 'x', + modalities: ['image'], + controls: { orientation: 'landscape', color: 'blue', safety: 'strict' }, + }) + expect(out.meta.controls).toEqual({ + requested: ['orientation', 'color', 'safety'], + appliedByProvider: { controlled: ['orientation', 'color'], plain: [] }, + ignoredByProvider: { controlled: ['safety'], plain: ['orientation', 'color', 'safety'] }, + }) + }) }) diff --git a/packages/core/src/__tests__/dedup.test.ts b/packages/core/src/__tests__/dedup.test.ts index b87f20c..38c9f1d 100644 --- a/packages/core/src/__tests__/dedup.test.ts +++ b/packages/core/src/__tests__/dedup.test.ts @@ -53,6 +53,18 @@ describe('dedupeReferences', () => { expect(out).toHaveLength(2) }) + it('uses a custom duplicate hook for host-supplied fingerprints', () => { + const out = dedupeReferences([ + make({ id: 'a', canonicalUrl: 'https://x/1', relevance: 0.4, raw: { fingerprint: 'same' } }), + make({ id: 'b', canonicalUrl: 'https://y/2', relevance: 0.9, raw: { fingerprint: 'same' } }), + make({ id: 'c', canonicalUrl: 'https://z/3', relevance: 0.6, raw: { fingerprint: 'other' } }), + ], { + isDuplicate: (candidate, existing) => + (candidate.raw as { fingerprint?: string }).fingerprint === (existing.raw as { fingerprint?: string }).fingerprint, + }) + expect(out.map(r => r.id)).toEqual(['b', 'c']) + }) + it('stale byUrl fix: C(url=a) must not dedupe against hash-replaced B(url=b) via stale index', () => { // Step 1: A(url=a, hash=ffff, rel=0.3) → pushed to kept[0]; byUrl = {url_a → 0} // Step 2: B(url=b, hash=fffe, rel=0.9) → url_b not in byUrl; hash-distance(fffe,ffff)=1≤4 → merges. diff --git a/packages/core/src/__tests__/provider.test.ts b/packages/core/src/__tests__/provider.test.ts index 2162f7d..dcd7797 100644 --- a/packages/core/src/__tests__/provider.test.ts +++ b/packages/core/src/__tests__/provider.test.ts @@ -34,4 +34,15 @@ describe('ReferenceProvider / defineProvider', () => { const p = defineProvider({ id: 'x', modalities: ['text'], queryFeatures: [], search: async () => [] }) expect(p.id).toBe('x') }) + + it('allows providers to declare supported unified search controls', () => { + const p = defineProvider({ + id: 'x', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'color', 'safety'] }, + search: async () => [], + }) + expect(p.capabilities?.controls).toEqual(['orientation', 'color', 'safety']) + }) }) diff --git a/packages/core/src/__tests__/query.test.ts b/packages/core/src/__tests__/query.test.ts index ff9bc1c..b3e3b91 100644 --- a/packages/core/src/__tests__/query.test.ts +++ b/packages/core/src/__tests__/query.test.ts @@ -34,4 +34,81 @@ describe('normalizeQuery', () => { expect(nq.text).toBe('cat') expect(nq.limit).toBe(10) }) + + it('passes only the matching providerOptions entry to the provider query', () => { + const nq = normalizeQuery( + { + query: 'cat', + modalities: ['image'], + providerOptions: { + p: { orderBy: 'latest' }, + other: { orderBy: 'relevant' }, + }, + }, + provider(['keyword']), + ) + expect(nq.providerOptions).toEqual({ orderBy: 'latest' }) + }) + + it('passes only provider-supported controls to the provider query', () => { + const p: ReferenceProvider = { + id: 'p', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'media.minWidth'] }, + search: async () => [], + } + const nq = normalizeQuery( + { + query: 'cat', + modalities: ['image'], + controls: { + orientation: 'landscape', + color: 'blue', + media: { minWidth: 1200, minHeight: 800 }, + }, + }, + p, + ) + expect(nq.controls).toEqual({ orientation: 'landscape', media: { minWidth: 1200 } }) + }) + + it('maps legacy filters into controls for compatibility', () => { + const p: ReferenceProvider = { + id: 'p', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'color', 'language'] }, + search: async () => [], + } + const nq = normalizeQuery( + { + query: 'cat', + modalities: ['image'], + filters: { orientation: 'portrait', color: 'red', language: 'en-US' }, + }, + p, + ) + expect(nq.controls).toEqual({ orientation: 'portrait', color: 'red', language: 'en-US' }) + }) + + it('prefers primary controls over conflicting legacy filters when normalizing controls', () => { + const p: ReferenceProvider = { + id: 'p', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'color', 'language'] }, + search: async () => [], + } + const nq = normalizeQuery( + { + query: 'cat', + modalities: ['image'], + filters: { orientation: 'portrait', color: 'red', language: 'en-US' }, + controls: { orientation: 'landscape', color: 'blue', language: 'fr' }, + }, + p, + ) + expect(nq.controls).toEqual({ orientation: 'landscape', color: 'blue', language: 'fr' }) + }) }) diff --git a/packages/core/src/client.ts b/packages/core/src/client.ts index b831dd8..7f98f3f 100644 --- a/packages/core/src/client.ts +++ b/packages/core/src/client.ts @@ -6,9 +6,17 @@ import type { Intent, Verdict } from './evaluate-use' import { evaluateUse } from './evaluate-use' import type { Attribution } from './attribution' import { buildAttribution } from './attribution' -import type { ReferenceProvider, ProviderContext, KeyValueCache, SearchFilters } from './provider' +import type { + ReferenceProvider, + ProviderContext, + KeyValueCache, + SearchFilters, + SearchControls, + SearchControlKey, + ProviderOptionsById, +} from './provider' import { mergeReferences, type MergeOptions } from './merge' -import { normalizeQuery } from './query' +import { mergeSearchControls, normalizeQuery, requestedControlKeys, supportedControlKeys, unsupportedControlKeys } from './query' export interface RefkitOptions { providers: ReferenceProvider[] @@ -23,10 +31,56 @@ export interface ProviderError { error: unknown } +export interface ProviderSearchStatus { + providerId: string + status: 'fulfilled' | 'failed' | 'skipped' + returned?: number + accepted?: number + rejected?: number + reason?: 'unsupported-modality' + error?: string +} + +export interface SearchGateMeta { + intent: Intent + before: number + after: number + dropped: number +} + +export interface SearchControlsMeta { + requested: SearchControlKey[] + appliedByProvider: Record + ignoredByProvider: Record +} + +export interface SearchMeta { + query: string + modalities: Modality[] + limit: number + poolFactor: number + fetchLimit: number + appliedFilters?: SearchFilters + controls?: SearchControlsMeta + providerOptions?: string[] + providers: ProviderSearchStatus[] + gate?: SearchGateMeta + warnings: string[] +} + +export interface SearchResult { + references: Reference[] + meta: SearchMeta +} + export interface SearchInput { query: string modalities: Modality[] filters?: SearchFilters + controls?: SearchControls + /** Provider-specific search controls keyed by provider id. Core routes only the + * matching entry to each provider; providers whitelist what they translate. */ + providerOptions?: ProviderOptionsById limit?: number /** Overfetch this many × `limit` candidates per provider before merge/rerank/gate, * then narrow to `limit` — a wider pool means better dedup + ranking. Default 4 @@ -42,6 +96,7 @@ export interface SearchInput { export interface RefkitClient { search(input: SearchInput): Promise + searchWithMeta(input: SearchInput): Promise evaluateUse(ref: Reference, intent: Intent, ctx?: { userJurisdiction?: string }): Verdict buildAttribution(ref: Reference): Attribution readonly providers: readonly ReferenceProvider[] @@ -51,12 +106,18 @@ const DEFAULT_LIMIT = 30 const DEFAULT_POOL_FACTOR = 4 const MAX_POOL_LIMIT = 100 // never ask a single source for more than this, even at high limits +function errorSummary(error: unknown): string { + if (error instanceof Error) return error.message + if (typeof error === 'string') return error + return 'unknown error' +} + export function createRefkit(options: RefkitOptions): RefkitClient { if (!options.providers || options.providers.length === 0) { throw new Error('createRefkit: at least one provider is required') } - async function search(input: SearchInput): Promise { + async function searchInternal(input: SearchInput): Promise { const doFetch = options.fetch ?? globalThis.fetch if (typeof doFetch !== 'function') { throw new Error('createRefkit: no fetch available — pass options.fetch') @@ -76,10 +137,28 @@ export function createRefkit(options: RefkitOptions): RefkitClient { // Overfetch a wider candidate pool per provider, then narrow to `limit` after // merge/rerank/gate — you can't rank or dedup candidates you never fetched. const fetchLimit = Math.max(limit, Math.min(Math.ceil(limit * poolFactor), MAX_POOL_LIMIT)) + const requestedControlsSource = mergeSearchControls(input.controls, input.filters) + const requestedControls = requestedControlKeys(requestedControlsSource) + const controlsMeta = requestedControls.length > 0 ? { + requested: requestedControls, + appliedByProvider: Object.fromEntries(options.providers.map(p => [p.id, supportedControlKeys(p, requestedControlsSource)])), + ignoredByProvider: Object.fromEntries(options.providers.map(p => [p.id, unsupportedControlKeys(p, requestedControlsSource)])), + } : undefined + const statusByProvider = new Map() + for (const p of options.providers) { + if (!chosen.includes(p)) statusByProvider.set(p.id, { providerId: p.id, status: 'skipped', reason: 'unsupported-modality' }) + } const settled = await Promise.allSettled( chosen.map(p => p.search( - normalizeQuery({ query: input.query, modalities: input.modalities, filters: input.filters, limit: fetchLimit }, p), + normalizeQuery({ + query: input.query, + modalities: input.modalities, + filters: input.filters, + controls: input.controls, + providerOptions: input.providerOptions, + limit: fetchLimit, + }, p), ctx, ), ), @@ -99,9 +178,17 @@ export function createRefkit(options: RefkitOptions): RefkitClient { input.onProviderError?.({ providerId: provider.id, error }) } } + statusByProvider.set(provider.id, { + providerId: provider.id, + status: 'fulfilled', + returned: res.value.length, + accepted: valid.length, + rejected: res.value.length - valid.length, + }) perSource.push(valid) } else { input.onProviderError?.({ providerId: provider.id, error: res.reason }) + statusByProvider.set(provider.id, { providerId: provider.id, status: 'failed', error: errorSummary(res.reason) }) } }) @@ -121,15 +208,43 @@ export function createRefkit(options: RefkitOptions): RefkitClient { if (input.rerank) { refs = await input.rerank({ query: input.query, refs, signal: input.signal ?? options.signal }) } + const beforeGate = refs.length + let gate: SearchGateMeta | undefined if (input.gateFor) { const intent = input.gateFor refs = refs.filter(r => evaluateUse(r.rights, intent).decision.startsWith('allowed')) + gate = { intent, before: beforeGate, after: refs.length, dropped: beforeGate - refs.length } + } + const references = refs.slice(0, limit) + const warnings: string[] = [] + const failedCount = [...statusByProvider.values()].filter(s => s.status === 'failed').length + if (failedCount > 0) warnings.push(`${failedCount} provider(s) failed; returning partial results.`) + if (gate && gate.dropped > 0) warnings.push(`${gate.dropped} result(s) dropped by ${gate.intent} gate.`) + return { + references, + meta: { + query: input.query, + modalities: input.modalities, + limit, + poolFactor, + fetchLimit, + ...(input.filters ? { appliedFilters: input.filters } : {}), + ...(controlsMeta ? { controls: controlsMeta } : {}), + ...(input.providerOptions ? { providerOptions: Object.keys(input.providerOptions) } : {}), + providers: options.providers.map(p => statusByProvider.get(p.id) ?? { providerId: p.id, status: 'skipped', reason: 'unsupported-modality' }), + ...(gate ? { gate } : {}), + warnings, + }, } - return refs.slice(0, limit) + } + + async function search(input: SearchInput): Promise { + return (await searchInternal(input)).references } return { search, + searchWithMeta: searchInternal, evaluateUse: (ref, intent, ctx) => evaluateUse(ref.rights, intent, ctx), buildAttribution: ref => buildAttribution({ diff --git a/packages/core/src/dedup.ts b/packages/core/src/dedup.ts index 30dc244..2f1c06d 100644 --- a/packages/core/src/dedup.ts +++ b/packages/core/src/dedup.ts @@ -21,6 +21,9 @@ export function hammingDistance(a: string, b: string): number { export interface DedupeOptions { /** Max hamming distance between perceptual hashes to treat as duplicates. Default 0 (off). */ hashThreshold?: number + /** Host-supplied duplicate predicate for precomputed fingerprints/embeddings. + * Core never fetches or decodes media; the hook compares existing Reference data. */ + isDuplicate?: (candidate: Reference, existing: Reference) => boolean } // Collapse duplicates, keeping the highest-relevance representative. Two passes: @@ -58,6 +61,22 @@ export function dedupeReferences(refs: Reference[], opts: DedupeOptions = {}): R } if (merged) continue + if (opts.isDuplicate) { + for (let i = 0; i < kept.length; i++) { + const k = kept[i] + if (opts.isDuplicate(ref, k)) { + if (ref.relevance > k.relevance) { + byUrl.delete(canonicalizeUrl(k.canonicalUrl)) + byUrl.set(canonicalizeUrl(ref.canonicalUrl), i) + kept[i] = ref + } + merged = true + break + } + } + } + if (merged) continue + byUrl.set(url, kept.length) kept.push(ref) } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index b939514..eb091c3 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -29,10 +29,32 @@ export type { QueryFeature, NormalizedQuery, SearchFilters, + SearchControls, + SearchControlKey, + SearchSort, + SearchSafety, + SearchLicenseControls, + SearchMediaControls, + SearchCreatorControls, + SearchTextControls, + ProviderCapabilities, + ProviderOptionValue, + ProviderOptions, + ProviderOptionsById, KeyValueCache, } from './provider' export { normalizeQuery } from './query' export { createRefkit } from './client' -export type { RefkitClient, RefkitOptions, SearchInput, ProviderError } from './client' +export type { + RefkitClient, + RefkitOptions, + SearchInput, + SearchResult, + SearchMeta, + SearchControlsMeta, + SearchGateMeta, + ProviderSearchStatus, + ProviderError, +} from './client' export { lexicalReranker, tokenize } from './rerank' export type { Reranker, RerankInput, LexicalRerankOptions } from './rerank' diff --git a/packages/core/src/provider.ts b/packages/core/src/provider.ts index 90e8c9a..c68bdd3 100644 --- a/packages/core/src/provider.ts +++ b/packages/core/src/provider.ts @@ -9,16 +9,84 @@ export type QueryFeature = | 'author' | 'language' +export type SearchSort = 'relevance' | 'latest' | 'popular' | 'interesting' +export type SearchSafety = 'strict' | 'moderate' | 'off' + +export interface SearchLicenseControls { + commercial?: boolean + modification?: boolean + allowUnknown?: boolean +} + +export interface SearchMediaControls { + kind?: 'photo' | 'illustration' | 'vector' | 'film' | 'animation' + size?: 'small' | 'medium' | 'large' + minWidth?: number + minHeight?: number + duration?: 'short' | 'medium' | 'long' +} + +export interface SearchCreatorControls { + id?: string + name?: string +} + +export interface SearchTextControls { + copyright?: 'public-domain' | 'copyrighted' | 'any' +} + +export interface SearchControls { + orientation?: 'landscape' | 'portrait' | 'square' + color?: string + language?: string + sort?: SearchSort + safety?: SearchSafety + license?: SearchLicenseControls + media?: SearchMediaControls + creator?: SearchCreatorControls + text?: SearchTextControls + page?: number +} + +export type SearchControlKey = + | 'orientation' + | 'color' + | 'language' + | 'sort' + | 'safety' + | 'license.commercial' + | 'license.modification' + | 'license.allowUnknown' + | 'media.kind' + | 'media.size' + | 'media.minWidth' + | 'media.minHeight' + | 'media.duration' + | 'creator.id' + | 'creator.name' + | 'text.copyright' + | 'page' + +export interface ProviderCapabilities { + controls: readonly SearchControlKey[] +} + export interface SearchFilters { color?: string orientation?: 'landscape' | 'portrait' | 'square' language?: string } +export type ProviderOptionValue = string | number | boolean | readonly string[] | undefined +export type ProviderOptions = Record +export type ProviderOptionsById = Record + export interface NormalizedQuery { text: string modalities: Modality[] filters?: SearchFilters + controls?: SearchControls + providerOptions?: ProviderOptions limit?: number } @@ -42,6 +110,7 @@ export interface ReferenceProvider { id: string modalities: Modality[] queryFeatures: QueryFeature[] + capabilities?: ProviderCapabilities search(query: NormalizedQuery, ctx: ProviderContext): Promise } diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts index 421858f..5d90700 100644 --- a/packages/core/src/query.ts +++ b/packages/core/src/query.ts @@ -1,10 +1,105 @@ import type { Modality } from './modality' -import type { NormalizedQuery, ReferenceProvider, SearchFilters } from './provider' +import type { + NormalizedQuery, + ProviderOptionsById, + ReferenceProvider, + SearchControlKey, + SearchControls, + SearchFilters, +} from './provider' + +function controlsFromFilters(filters: SearchFilters | undefined): SearchControls { + if (!filters) return {} + return { + ...(filters.orientation ? { orientation: filters.orientation } : {}), + ...(filters.color ? { color: filters.color } : {}), + ...(filters.language ? { language: filters.language } : {}), + } +} + +export function mergeSearchControls(controls: SearchControls | undefined, filters: SearchFilters | undefined): SearchControls { + return { ...controlsFromFilters(filters), ...(controls ?? {}) } +} + +function hasControl(controls: SearchControls, key: SearchControlKey): boolean { + switch (key) { + case 'orientation': return controls.orientation !== undefined + case 'color': return controls.color !== undefined + case 'language': return controls.language !== undefined + case 'sort': return controls.sort !== undefined + case 'safety': return controls.safety !== undefined + case 'license.commercial': return controls.license?.commercial !== undefined + case 'license.modification': return controls.license?.modification !== undefined + case 'license.allowUnknown': return controls.license?.allowUnknown !== undefined + case 'media.kind': return controls.media?.kind !== undefined + case 'media.size': return controls.media?.size !== undefined + case 'media.minWidth': return controls.media?.minWidth !== undefined + case 'media.minHeight': return controls.media?.minHeight !== undefined + case 'media.duration': return controls.media?.duration !== undefined + case 'creator.id': return controls.creator?.id !== undefined + case 'creator.name': return controls.creator?.name !== undefined + case 'text.copyright': return controls.text?.copyright !== undefined + case 'page': return controls.page !== undefined + } +} + +function setControl(out: SearchControls, key: SearchControlKey, controls: SearchControls) { + switch (key) { + case 'orientation': out.orientation = controls.orientation; return + case 'color': out.color = controls.color; return + case 'language': out.language = controls.language; return + case 'sort': out.sort = controls.sort; return + case 'safety': out.safety = controls.safety; return + case 'license.commercial': out.license = { ...(out.license ?? {}), commercial: controls.license?.commercial }; return + case 'license.modification': out.license = { ...(out.license ?? {}), modification: controls.license?.modification }; return + case 'license.allowUnknown': out.license = { ...(out.license ?? {}), allowUnknown: controls.license?.allowUnknown }; return + case 'media.kind': out.media = { ...(out.media ?? {}), kind: controls.media?.kind }; return + case 'media.size': out.media = { ...(out.media ?? {}), size: controls.media?.size }; return + case 'media.minWidth': out.media = { ...(out.media ?? {}), minWidth: controls.media?.minWidth }; return + case 'media.minHeight': out.media = { ...(out.media ?? {}), minHeight: controls.media?.minHeight }; return + case 'media.duration': out.media = { ...(out.media ?? {}), duration: controls.media?.duration }; return + case 'creator.id': out.creator = { ...(out.creator ?? {}), id: controls.creator?.id }; return + case 'creator.name': out.creator = { ...(out.creator ?? {}), name: controls.creator?.name }; return + case 'text.copyright': out.text = { ...(out.text ?? {}), copyright: controls.text?.copyright }; return + case 'page': out.page = controls.page; return + } +} + +export function requestedControlKeys(controls: SearchControls): SearchControlKey[] { + const allControlKeys: SearchControlKey[] = [ + 'orientation', 'color', 'language', 'sort', 'safety', + 'license.commercial', 'license.modification', 'license.allowUnknown', + 'media.kind', 'media.size', 'media.minWidth', 'media.minHeight', 'media.duration', + 'creator.id', 'creator.name', 'text.copyright', 'page', + ] + return allControlKeys.filter(key => hasControl(controls, key)) +} + +export function supportedControlKeys(provider: ReferenceProvider, controls: SearchControls): SearchControlKey[] { + const caps = provider.capabilities?.controls ?? [] + return caps.filter(key => hasControl(controls, key)) +} + +export function unsupportedControlKeys(provider: ReferenceProvider, controls: SearchControls): SearchControlKey[] { + const requested = requestedControlKeys(controls) + const supported = new Set(provider.capabilities?.controls ?? []) + return requested.filter(key => !supported.has(key)) +} + +export function normalizeControlsForProvider(input: { + controls?: SearchControls + filters?: SearchFilters +}, provider: ReferenceProvider): SearchControls | undefined { + const merged = mergeSearchControls(input.controls, input.filters) + const supported = supportedControlKeys(provider, merged) + if (supported.length === 0) return undefined + const out: SearchControls = {} + for (const key of supported) setControl(out, key, merged) + return out +} -// Build a per-provider NormalizedQuery: keep only the filters this provider's -// queryFeatures support (silently drop the rest — never error), and intersect modalities. export function normalizeQuery( - input: { query: string; modalities: Modality[]; filters?: SearchFilters; limit?: number }, + input: { query: string; modalities: Modality[]; filters?: SearchFilters; controls?: SearchControls; providerOptions?: ProviderOptionsById; limit?: number }, provider: ReferenceProvider, ): NormalizedQuery { const feats = new Set(provider.queryFeatures) @@ -13,10 +108,13 @@ export function normalizeQuery( if (input.filters?.orientation && feats.has('orientation')) filters.orientation = input.filters.orientation if (input.filters?.language && feats.has('language')) filters.language = input.filters.language const hasFilters = Object.keys(filters).length > 0 + const controls = normalizeControlsForProvider(input, provider) return { text: input.query, modalities: input.modalities.filter(m => provider.modalities.includes(m)), ...(hasFilters ? { filters } : {}), + ...(controls ? { controls } : {}), + ...(input.providerOptions?.[provider.id] ? { providerOptions: input.providerOptions[provider.id] } : {}), ...(input.limit !== undefined ? { limit: input.limit } : {}), } } diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 59823d1..43aab0f 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -40,12 +40,29 @@ await serveStdio(createRefkit({ ## The `search_references` tool -Input: `{ query, modalities?, limit?, intent?, gateFor? }`. +Input: `{ query, modalities?, controls?, filters?, providerOptions?, explain?, limit?, intent?, gateFor? }`. +- `controls` — provider-neutral search controls such as `{ orientation, color, language, sort, safety, license, media }`; providers translate supported controls and report ignored controls when `explain: true`. - `intent` — annotate each result with a **use-verdict** for that intended use (no filtering). - `gateFor` — return only results whose license allows that intent. +- `filters` — compatibility alias for `controls.orientation`, `controls.color`, and `controls.language`. +- `explain` — include provider status, applied and ignored unified controls, warnings, and gate/drop metadata. +- `providerOptions` — typed provider-specific whitelisted controls keyed by provider id, for example: -Output: `{ references: [{ id, title?, modality, provider, canonicalUrl, license, thumbnail?, excerpt?, useVerdict?, attribution? }] }`. When `intent` (or `gateFor`) is set, each result carries `useVerdict { decision, reason, confidence }` and — if the license requires it — a ready-to-use `attribution` credit line. +```json +{ + "query": "forest path", + "modalities": ["image"], + "controls": { "orientation": "landscape", "color": "green", "safety": "strict" }, + "providerOptions": { + "unsplash": { "collections": ["abc", "def"], "page": 2 }, + "flickr": { "tags": ["forest", "path"], "tagMode": "all", "minTakenDate": "2020-01-01" }, + "brave": { "country": "US", "searchLang": "en" } + } +} +``` + +Output: `{ references: [{ id, title?, modality, provider, canonicalUrl, license, thumbnail?, excerpt?, useVerdict?, useExplanation?, attribution? }], meta? }`. When `intent` (or `gateFor`) is set, each result carries `useVerdict { decision, reason, confidence }`, a plain `useExplanation`, and — if the license requires it — a ready-to-use `attribution` credit line. When `explain: true`, `meta` includes per-provider `fulfilled` / `failed` / `skipped` status, applied/ignored control details, warnings, and gate/drop counts. > Results are references with a license id + source link — **not rights clearance, not legal advice**. `unknown` / `needs-review` results require the caller to verify the source's terms. diff --git a/packages/mcp/src/__tests__/mcp.test.ts b/packages/mcp/src/__tests__/mcp.test.ts index 61b7c3a..16192e2 100644 --- a/packages/mcp/src/__tests__/mcp.test.ts +++ b/packages/mcp/src/__tests__/mcp.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from 'vitest' import { Client } from '@modelcontextprotocol/sdk/client/index.js' import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js' -import { createRefkit } from '@refkit/core' +import { createRefkit, defineProvider } from '@refkit/core' import { openverse } from '@refkit/provider-openverse' import { createRefkitMcpServer } from '../index' import { defaultProviders } from '../cli' @@ -83,6 +83,145 @@ describe('@refkit/mcp', () => { expect(structured.references[0].attribution).toContain('CC-BY') await client.close() }) + + it('accepts filters and providerOptions for provider-specific search controls', async () => { + let seen: { filters?: unknown; providerOptions?: unknown } = {} + const fakeProvider = defineProvider({ + id: 'fake', + modalities: ['image'], + queryFeatures: ['keyword', 'orientation'], + search: async (q) => { + seen = { filters: q.filters, providerOptions: q.providerOptions } + return [] + }, + }) + const server = createRefkitMcpServer(createRefkit({ providers: [fakeProvider] })) + const [clientT, serverT] = InMemoryTransport.createLinkedPair() + const client = new Client({ name: 'test', version: '1.0.0' }) + await Promise.all([client.connect(clientT), server.connect(serverT)]) + await client.callTool({ + name: 'search_references', + arguments: { + query: 'sky', + modalities: ['image'], + filters: { orientation: 'landscape' }, + providerOptions: { fake: { sort: 'latest' } }, + }, + }) + expect(seen.filters).toEqual({ orientation: 'landscape' }) + expect(seen.providerOptions).toEqual({ sort: 'latest' }) + await client.close() + }) + + it('accepts unified controls and forwards them to core search', async () => { + let seen: unknown + const fakeProvider = defineProvider({ + id: 'fake', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation', 'color', 'safety'] }, + search: async (q) => { + seen = q.controls + return [] + }, + }) + const server = createRefkitMcpServer(createRefkit({ providers: [fakeProvider] })) + const [clientT, serverT] = InMemoryTransport.createLinkedPair() + const client = new Client({ name: 'test', version: '1.0.0' }) + await Promise.all([client.connect(clientT), server.connect(serverT)]) + await client.callTool({ + name: 'search_references', + arguments: { + query: 'sky', + modalities: ['image'], + controls: { orientation: 'landscape', color: 'blue', safety: 'strict' }, + }, + }) + expect(seen).toEqual({ orientation: 'landscape', color: 'blue', safety: 'strict' }) + await client.close() + }) + + it('includes control support metadata when explain is true', async () => { + const fakeProvider = defineProvider({ + id: 'fake', + modalities: ['image'], + queryFeatures: ['keyword'], + capabilities: { controls: ['orientation'] }, + search: async () => [], + }) + const server = createRefkitMcpServer(createRefkit({ providers: [fakeProvider] })) + const [clientT, serverT] = InMemoryTransport.createLinkedPair() + const client = new Client({ name: 'test', version: '1.0.0' }) + await Promise.all([client.connect(clientT), server.connect(serverT)]) + const res = await client.callTool({ + name: 'search_references', + arguments: { + query: 'sky', + modalities: ['image'], + controls: { orientation: 'landscape', color: 'blue' }, + explain: true, + }, + }) + const structured = res.structuredContent as { + meta?: { + controls?: { + requested: string[] + appliedByProvider: Record + ignoredByProvider: Record + } + } + } + expect(structured.meta?.controls).toEqual({ + requested: ['orientation', 'color'], + appliedByProvider: { fake: ['orientation'] }, + ignoredByProvider: { fake: ['color'] }, + }) + await client.close() + }) + + it('returns meta and use explanations when explain is true', async () => { + const good = defineProvider({ + id: 'good', + modalities: ['image'], + queryFeatures: ['keyword'], + search: async () => [{ + id: 'good-1', + modality: 'image', + title: 'credit me', + source: { providerId: 'good', sourceUrl: 'https://good/1' }, + canonicalUrl: 'https://good/1', + rights: { license: 'CC-BY', rehostPolicy: 'cache-allowed', raw: { sourceTerms: 'terms', sourceUrl: 'https://good/1' } }, + verifiedAt: '2026-06-22T00:00:00.000Z', + relevance: 1, + }], + }) + const bad = defineProvider({ + id: 'bad', + modalities: ['image'], + queryFeatures: ['keyword'], + search: async () => { throw new Error('offline') }, + }) + const server = createRefkitMcpServer(createRefkit({ providers: [good, bad] })) + const [clientT, serverT] = InMemoryTransport.createLinkedPair() + const client = new Client({ name: 'test', version: '1.0.0' }) + await Promise.all([client.connect(clientT), server.connect(serverT)]) + + const res = await client.callTool({ + name: 'search_references', + arguments: { query: 'credit', modalities: ['image'], intent: 'commercial-product', explain: true }, + }) + const structured = res.structuredContent as { + references: Array<{ useExplanation?: string }> + meta?: { providers: Array<{ providerId: string; status: string; error?: string }>; warnings: string[] } + } + expect(structured.references[0].useExplanation).toContain('allowed-with-attribution') + expect(structured.meta?.providers).toEqual([ + { providerId: 'good', status: 'fulfilled', returned: 1, accepted: 1, rejected: 0 }, + { providerId: 'bad', status: 'failed', error: 'offline' }, + ]) + expect(structured.meta?.warnings).toContain('1 provider(s) failed; returning partial results.') + await client.close() + }) }) describe('defaultProviders (zero-config CLI wiring)', () => { diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 3e15ded..e780092 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -2,10 +2,68 @@ import { readFileSync } from 'node:fs' import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js' import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js' import { z } from 'zod' -import type { RefkitClient, Reference, Verdict, Attribution } from '@refkit/core' +import type { RefkitClient, Reference, Verdict, Attribution, SearchFilters, SearchControls, SearchControlKey, ProviderOptionsById, SearchMeta } from '@refkit/core' const MODALITIES = ['image', 'video', 'audio', 'text'] as const const INTENTS = ['internal-moodboard', 'commercial-product', 'ai-generation-input', 'redistribution'] as const +const ORIENTATIONS = ['landscape', 'portrait', 'square'] as const +const SEARCH_CONTROL_KEYS = [ + 'orientation', + 'color', + 'language', + 'sort', + 'safety', + 'license.commercial', + 'license.modification', + 'license.allowUnknown', + 'media.kind', + 'media.size', + 'media.minWidth', + 'media.minHeight', + 'media.duration', + 'creator.id', + 'creator.name', + 'text.copyright', + 'page', +] as const satisfies readonly SearchControlKey[] + +const filtersSchema = z.object({ + color: z.string().optional(), + orientation: z.enum(ORIENTATIONS).optional(), + language: z.string().optional(), +}) +const searchControlKeySchema = z.enum(SEARCH_CONTROL_KEYS) + +const searchControlsSchema = z.object({ + orientation: z.enum(ORIENTATIONS).optional(), + color: z.string().optional(), + language: z.string().optional(), + sort: z.enum(['relevance', 'latest', 'popular', 'interesting']).optional(), + safety: z.enum(['strict', 'moderate', 'off']).optional(), + license: z.object({ + commercial: z.boolean().optional(), + modification: z.boolean().optional(), + allowUnknown: z.boolean().optional(), + }).optional(), + media: z.object({ + kind: z.enum(['photo', 'illustration', 'vector', 'film', 'animation']).optional(), + size: z.enum(['small', 'medium', 'large']).optional(), + minWidth: z.number().int().nonnegative().optional(), + minHeight: z.number().int().nonnegative().optional(), + duration: z.enum(['short', 'medium', 'long']).optional(), + }).optional(), + creator: z.object({ + id: z.string().optional(), + name: z.string().optional(), + }).optional(), + text: z.object({ + copyright: z.enum(['public-domain', 'copyrighted', 'any']).optional(), + }).optional(), + page: z.number().int().positive().optional(), +}) + +const providerOptionValueSchema = z.union([z.string(), z.number(), z.boolean(), z.array(z.string())]) +const providerOptionsSchema = z.record(z.string(), z.record(z.string(), providerOptionValueSchema)) // Reported in the MCP initialize handshake. Read the real version (the dist sits // next to package.json, which npm always ships) instead of a hardcoded placeholder. @@ -33,9 +91,12 @@ function toAgentRef(r: Reference, assessment?: { verdict: Verdict; attribution: } if (!assessment) return base const { verdict, attribution } = assessment + const reason = verdict.reasons.join('; ') + const useExplanation = `${verdict.decision}: ${reason || 'license facts allow this use'}${attribution.required && attribution.text ? ` Attribution required: ${attribution.text}` : ''}` return { ...base, useVerdict: { decision: verdict.decision, reason: verdict.reasons.join('; '), confidence: verdict.confidence }, + useExplanation, ...(attribution.required && attribution.text ? { attribution: attribution.text } : {}), } } @@ -53,9 +114,41 @@ const agentRefSchema = z.object({ .object({ decision: z.string(), reason: z.string(), confidence: z.string() }) .optional() .describe('present when `intent` (or `gateFor`) is set: may this be used for that intent, and how confident'), + useExplanation: z.string().optional().describe('plain-language use verdict summary for agents'), attribution: z.string().optional().describe('ready-to-use credit line; present when the license requires attribution'), }) +const searchMetaSchema: z.ZodType = z.object({ + query: z.string(), + modalities: z.array(z.enum(MODALITIES)), + limit: z.number(), + poolFactor: z.number(), + fetchLimit: z.number(), + appliedFilters: filtersSchema.optional(), + controls: z.object({ + requested: z.array(searchControlKeySchema), + appliedByProvider: z.record(z.string(), z.array(searchControlKeySchema)), + ignoredByProvider: z.record(z.string(), z.array(searchControlKeySchema)), + }).optional(), + providerOptions: z.array(z.string()).optional(), + providers: z.array(z.object({ + providerId: z.string(), + status: z.enum(['fulfilled', 'failed', 'skipped']), + returned: z.number().optional(), + accepted: z.number().optional(), + rejected: z.number().optional(), + reason: z.enum(['unsupported-modality']).optional(), + error: z.string().optional(), + })), + gate: z.object({ + intent: z.enum(INTENTS), + before: z.number(), + after: z.number(), + dropped: z.number(), + }).optional(), + warnings: z.array(z.string()), +}) + /** Wrap a configured RefkitClient as an MCP server exposing `search_references`. */ export function createRefkitMcpServer(refkit: RefkitClient): McpServer { const server = new McpServer({ name: 'refkit', version: VERSION }) @@ -72,14 +165,28 @@ export function createRefkitMcpServer(refkit: RefkitClient): McpServer { inputSchema: { query: z.string().describe('what to search for, e.g. "cyberpunk alley at night"'), modalities: z.array(z.enum(MODALITIES)).optional().describe('default ["image"]'), + filters: filtersSchema.optional().describe('compatibility alias for controls.orientation, controls.color, and controls.language'), + controls: searchControlsSchema.optional().describe('provider-neutral search controls; providers translate supported controls and report ignored controls in explain metadata'), + providerOptions: providerOptionsSchema.optional().describe('provider-specific search controls keyed by provider id; each provider whitelists supported keys'), + explain: z.boolean().optional().describe('include provider status, applied and ignored controls, warnings, and gate/drop metadata'), limit: z.number().int().positive().optional(), intent: z.enum(INTENTS).optional().describe('annotate each result with a use-verdict for this intended use (no filtering)'), gateFor: z.enum(INTENTS).optional().describe('only return results whose license allows this intended use'), }, - outputSchema: { references: z.array(agentRefSchema) }, + outputSchema: { references: z.array(agentRefSchema), meta: searchMetaSchema.optional() }, }, - async ({ query, modalities, limit, intent, gateFor }) => { - const refs = await refkit.search({ query, modalities: modalities ?? ['image'], limit, gateFor }) + async ({ query, modalities, filters, controls, providerOptions, explain, limit, intent, gateFor }) => { + const searchInput = { + query, + modalities: modalities ?? ['image'], + filters: filters as SearchFilters | undefined, + controls: controls as SearchControls | undefined, + providerOptions: providerOptions as ProviderOptionsById | undefined, + limit, + gateFor, + } + const result = explain ? await refkit.searchWithMeta(searchInput) : { references: await refkit.search(searchInput), meta: undefined } + const refs = result.references const assessIntent = intent ?? gateFor const references = refs.map(r => assessIntent @@ -88,7 +195,7 @@ export function createRefkitMcpServer(refkit: RefkitClient): McpServer { ) return { content: [{ type: 'text', text: `${references.length} reference(s) for "${query}".` }], - structuredContent: { references }, + structuredContent: { references, ...(result.meta ? { meta: result.meta } : {}) }, } }, ) diff --git a/packages/provider-artic/src/__tests__/artic.test.ts b/packages/provider-artic/src/__tests__/artic.test.ts index eb5f848..ffbc5cb 100644 --- a/packages/provider-artic/src/__tests__/artic.test.ts +++ b/packages/provider-artic/src/__tests__/artic.test.ts @@ -32,4 +32,32 @@ describe('artic provider', () => { const refs = await artic().search({ text: 'x', modalities: ['image'] }, ctxWith({ data: [FIXTURE.data[0]] })) expect(refs[0].preview?.url).toContain('https://www.artic.edu/iiif/2/') }) + + it('forwards documented ArtIC artwork search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ data: [] }), { status: 200 }) + }) as typeof fetch, + } + await artic().search({ + text: 'lion', + modalities: ['image'], + providerOptions: { + sort: 'timestamp:desc', + from: 20, + size: 8, + facets: ['artist_title', 'style_titles'], + fields: ['id', 'title', 'image_id', 'is_public_domain', 'artist_display', 'date_display'], + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('sort')).toBe('timestamp:desc') + expect(url.searchParams.get('from')).toBe('20') + expect(url.searchParams.get('size')).toBe('8') + expect(url.searchParams.get('facets')).toBe('artist_title,style_titles') + expect(url.searchParams.get('fields')).toBe('id,title,image_id,is_public_domain,artist_display,date_display') + expect(url.searchParams.get('query[term][is_public_domain]')).toBe('true') + }) }) diff --git a/packages/provider-artic/src/index.ts b/packages/provider-artic/src/index.ts index 9562670..a1a3be7 100644 --- a/packages/provider-artic/src/index.ts +++ b/packages/provider-artic/src/index.ts @@ -15,6 +15,14 @@ interface ArticResponse { config?: { iiif_url?: string } } +export interface ArticSearchOptions { + sort?: string + from?: number + size?: number + facets?: string | readonly string[] + fields?: string | readonly string[] +} + // AIC's artist_display packs name + nationality + dates across lines; keep the first line. function artistName(display: string | null): string | undefined { if (!display) return undefined @@ -46,18 +54,50 @@ function toReference(a: ArticArtwork, iiifUrl: string): Reference | null { } } +function setIfString(url: URL, key: string, value: unknown) { + if (typeof value !== 'string' || !value) return + url.searchParams.set(key, value) +} + +function setIfNonNegativeInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 0) return + url.searchParams.set(key, String(value)) +} + +function setStringList(url: URL, key: string, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set(key, value) + if (Array.isArray(value) && value.every(v => typeof v === 'string')) url.searchParams.set(key, value.join(',')) +} + +function articFields(value: unknown): string { + const fields = new Set(['id', 'title', 'image_id', 'is_public_domain', 'artist_display']) + if (typeof value === 'string') { + for (const item of value.split(',')) if (item.trim()) fields.add(item.trim()) + } + if (Array.isArray(value) && value.every(v => typeof v === 'string')) { + for (const item of value) if (item) fields.add(item) + } + return Array.from(fields).join(',') +} + export function artic() { return defineProvider({ id: 'artic', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: [] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.artic.edu/api/v1/artworks/search') url.searchParams.set('q', q.text) + const opts = q.providerOptions as ArticSearchOptions | undefined // relevance hint — toReference is authoritative on is_public_domain url.searchParams.set('query[term][is_public_domain]', 'true') - url.searchParams.set('fields', 'id,title,image_id,is_public_domain,artist_display') + url.searchParams.set('fields', articFields(opts?.fields)) url.searchParams.set('limit', String(q.limit ?? 20)) + setIfString(url, 'sort', opts?.sort) + setIfNonNegativeInt(url, 'from', opts?.from) + setIfNonNegativeInt(url, 'size', opts?.size) + setStringList(url, 'facets', opts?.facets) const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`artic search failed: ${res.status}`) const json = (await res.json()) as ArticResponse diff --git a/packages/provider-brave/src/__tests__/brave.test.ts b/packages/provider-brave/src/__tests__/brave.test.ts index 503d1d1..878c211 100644 --- a/packages/provider-brave/src/__tests__/brave.test.ts +++ b/packages/provider-brave/src/__tests__/brave.test.ts @@ -52,4 +52,78 @@ describe('brave provider', () => { expect(evaluateUse(refs[0].rights, 'commercial-product').decision).toBe('needs-review') expect(evaluateUse(refs[0].rights, 'ai-generation-input').decision).toBe('needs-review') }) + + it('maps unified safety controls to Brave safesearch', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await brave({ token: 't' }).search({ text: 'cat', modalities: ['image'], controls: { safety: 'off' } }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('safesearch')).toBe('off') + }) + + it('forwards documented Brave image search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await brave({ token: 't' }).search({ + text: 'cat', + modalities: ['image'], + providerOptions: { + country: 'JP', + searchLang: 'ja', + count: 99, + safesearch: 'off', + spellcheck: false, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('country')).toBe('JP') + expect(url.searchParams.get('search_lang')).toBe('ja') + expect(url.searchParams.get('count')).toBe('99') + expect(url.searchParams.get('safesearch')).toBe('off') + expect(url.searchParams.get('spellcheck')).toBe('false') + }) + + it('lets per-query strict safety override a factory off default', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await brave({ token: 't', safesearch: 'off' }).search({ + text: 'cat', + modalities: ['image'], + controls: { safety: 'strict' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('safesearch')).toBe('strict') + }) + + it('lets per-query off safety override a factory strict default', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await brave({ token: 't', safesearch: 'strict' }).search({ + text: 'cat', + modalities: ['image'], + controls: { safety: 'off' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('safesearch')).toBe('off') + }) }) diff --git a/packages/provider-brave/src/index.ts b/packages/provider-brave/src/index.ts index 628aee7..0d0ff24 100644 --- a/packages/provider-brave/src/index.ts +++ b/packages/provider-brave/src/index.ts @@ -1,6 +1,6 @@ import { defineProvider, referenceId, - type Reference, type RightsRecord, type NormalizedQuery, type ProviderContext, + type Reference, type RightsRecord, type NormalizedQuery, type ProviderContext, type SearchSafety, } from '@refkit/core' export interface BraveConfig { @@ -9,6 +9,14 @@ export interface BraveConfig { safesearch?: 'strict' | 'off' } +export interface BraveImageSearchOptions { + country?: string + searchLang?: string + count?: number + safesearch?: 'strict' | 'off' + spellcheck?: boolean +} + interface BraveImageResult { title: string url: string // the source webpage (canonical link) @@ -18,6 +26,28 @@ interface BraveImageResult { } interface BraveResponse { results: BraveImageResult[] } +function braveSafeSearch(control: SearchSafety | undefined, fallback: BraveConfig['safesearch']): 'strict' | 'off' { + if (control === 'off') return 'off' + if (control === 'strict' || control === 'moderate') return 'strict' + return fallback ?? 'strict' +} + +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string' || !value) return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown, max?: number) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(max ? Math.min(value, max) : value)) +} + +function setIfBoolean(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, String(value)) +} + function toReference(r: BraveImageResult): Reference { const rights: RightsRecord = { // open web → no license metadata → evaluateUse returns needs-review (never auto-allowed) @@ -45,11 +75,18 @@ export function brave(config: BraveConfig) { id: 'brave', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: ['safety'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.search.brave.com/res/v1/images/search') url.searchParams.set('q', q.text) url.searchParams.set('count', String(Math.min(q.limit ?? 50, 200))) - url.searchParams.set('safesearch', config.safesearch ?? 'strict') + url.searchParams.set('safesearch', braveSafeSearch(q.controls?.safety, config.safesearch)) + const opts = q.providerOptions as BraveImageSearchOptions | undefined + setIfString(url, 'country', opts?.country) + setIfString(url, 'search_lang', opts?.searchLang) + setIfPositiveInt(url, 'count', opts?.count, 200) + setIfString(url, 'safesearch', opts?.safesearch, ['strict', 'off']) + setIfBoolean(url, 'spellcheck', opts?.spellcheck) const res = await ctx.fetch(url.toString(), { headers: { 'X-Subscription-Token': config.token, Accept: 'application/json' }, signal: ctx.signal, diff --git a/packages/provider-flickr/src/__tests__/flickr.test.ts b/packages/provider-flickr/src/__tests__/flickr.test.ts index 7da71e4..f7e67e5 100644 --- a/packages/provider-flickr/src/__tests__/flickr.test.ts +++ b/packages/provider-flickr/src/__tests__/flickr.test.ts @@ -68,4 +68,136 @@ describe('flickr provider', () => { flickr({ apiKey: 'k' }).search({ text: 'x', modalities: ['image'] }, ctxWith({ stat: 'fail', code: 100, message: 'Invalid API Key' })), ).rejects.toThrow(/flickr search error/) }) + + it('forwards documented Flickr-specific search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await flickr({ apiKey: 'k' }).search({ + text: 'sunset', + modalities: ['image'], + providerOptions: { + licenseFilter: '4,5', + sort: 'interestingness-desc', + safeSearch: 1, + tags: ['bay', 'sunset'], + tagMode: 'all', + userId: '99@N00', + minUploadDate: '2024-01-01', + maxUploadDate: '2024-12-31', + minTakenDate: '2023-01-01', + maxTakenDate: '2023-12-31', + bbox: '-122.6,37.6,-122.3,37.9', + accuracy: 11, + machineTags: ['dc:title="sunset"', 'geo:city=san-francisco'], + machineTagMode: 'any', + groupId: '123@N00', + woeId: '2487956', + placeId: 'abc123', + hasGeo: true, + geoContext: 2, + lat: '37.7749', + lon: '-122.4194', + radius: 10, + radiusUnits: 'km', + isCommons: true, + inGallery: true, + isGetty: false, + extras: ['description', 'tags'], + page: 2, + perPage: 50, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license')).toBe('4,5') + expect(url.searchParams.get('sort')).toBe('interestingness-desc') + expect(url.searchParams.get('safe_search')).toBe('1') + expect(url.searchParams.get('tags')).toBe('bay,sunset') + expect(url.searchParams.get('tag_mode')).toBe('all') + expect(url.searchParams.get('user_id')).toBe('99@N00') + expect(url.searchParams.get('min_upload_date')).toBe('2024-01-01') + expect(url.searchParams.get('max_upload_date')).toBe('2024-12-31') + expect(url.searchParams.get('min_taken_date')).toBe('2023-01-01') + expect(url.searchParams.get('max_taken_date')).toBe('2023-12-31') + expect(url.searchParams.get('bbox')).toBe('-122.6,37.6,-122.3,37.9') + expect(url.searchParams.get('accuracy')).toBe('11') + expect(url.searchParams.get('machine_tags')).toBe('dc:title="sunset",geo:city=san-francisco') + expect(url.searchParams.get('machine_tag_mode')).toBe('any') + expect(url.searchParams.get('group_id')).toBe('123@N00') + expect(url.searchParams.get('woe_id')).toBe('2487956') + expect(url.searchParams.get('place_id')).toBe('abc123') + expect(url.searchParams.get('has_geo')).toBe('1') + expect(url.searchParams.get('geo_context')).toBe('2') + expect(url.searchParams.get('lat')).toBe('37.7749') + expect(url.searchParams.get('lon')).toBe('-122.4194') + expect(url.searchParams.get('radius')).toBe('10') + expect(url.searchParams.get('radius_units')).toBe('km') + expect(url.searchParams.get('is_commons')).toBe('1') + expect(url.searchParams.get('in_gallery')).toBe('1') + expect(url.searchParams.get('is_getty')).toBe('0') + expect(url.searchParams.get('extras')).toContain('description') + expect(url.searchParams.get('extras')).toContain('license') + expect(url.searchParams.get('page')).toBe('2') + expect(url.searchParams.get('per_page')).toBe('50') + }) + + it('maps unified controls to documented Flickr search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await flickr({ apiKey: 'k' }).search({ + text: 'sunset', + modalities: ['image'], + controls: { + sort: 'interesting', + safety: 'strict', + license: { commercial: true, modification: true }, + creator: { id: '99@N00' }, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('sort')).toBe('interestingness-desc') + expect(url.searchParams.get('safe_search')).toBe('1') + expect(url.searchParams.get('license')).toBe('4,5,9,10,11,12') + expect(url.searchParams.get('user_id')).toBe('99@N00') + }) + + it('lets explicit Flickr providerOptions override equivalent unified controls', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await flickr({ apiKey: 'k' }).search({ + text: 'sunset', + modalities: ['image'], + controls: { + sort: 'interesting', + safety: 'strict', + license: { commercial: true, modification: true }, + creator: { id: 'control-user' }, + }, + providerOptions: { + licenseFilter: '4,5', + sort: 'date-taken-desc', + safeSearch: 3, + userId: 'option-user', + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license')).toBe('4,5') + expect(url.searchParams.get('sort')).toBe('date-taken-desc') + expect(url.searchParams.get('safe_search')).toBe('3') + expect(url.searchParams.get('user_id')).toBe('option-user') + }) }) diff --git a/packages/provider-flickr/src/index.ts b/packages/provider-flickr/src/index.ts index 4624eaf..85a2c0d 100644 --- a/packages/provider-flickr/src/index.ts +++ b/packages/provider-flickr/src/index.ts @@ -1,6 +1,6 @@ import { defineProvider, referenceId, - type Reference, type RightsRecord, type LicenseId, + type Reference, type RightsRecord, type LicenseId, type SearchLicenseControls, type NormalizedQuery, type ProviderContext, } from '@refkit/core' @@ -11,6 +11,38 @@ export interface FlickrConfig { licenseFilter?: string } +export interface FlickrSearchOptions { + licenseFilter?: string + sort?: 'date-posted-asc' | 'date-posted-desc' | 'date-taken-asc' | 'date-taken-desc' | 'interestingness-desc' | 'interestingness-asc' | 'relevance' + safeSearch?: 1 | 2 | 3 + tags?: string | readonly string[] + tagMode?: 'any' | 'all' + userId?: string + minUploadDate?: string | number + maxUploadDate?: string | number + minTakenDate?: string | number + maxTakenDate?: string | number + bbox?: string + accuracy?: number + machineTags?: string | readonly string[] + machineTagMode?: 'any' | 'all' + groupId?: string + woeId?: string + placeId?: string + hasGeo?: boolean + geoContext?: 0 | 1 | 2 + lat?: string + lon?: string + radius?: number + radiusUnits?: 'mi' | 'km' + isCommons?: boolean + inGallery?: boolean + isGetty?: boolean + extras?: string | readonly string[] + page?: number + perPage?: number +} + // Flickr numeric license id → our LicenseId (+ CC version). See // flickr.photos.licenses.getInfo. All Rights Reserved (0) and every NC/ND // variant map to 'proprietary' (→ denied for commercial/AI use). @@ -56,6 +88,78 @@ interface FlickrPhoto { } interface FlickrResponse { photos?: { photo: FlickrPhoto[] }; stat: string } +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string') return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfSafeSearch(url: URL, value: unknown) { + if (value !== 1 && value !== 2 && value !== 3) return + url.searchParams.set('safe_search', String(value)) +} + +function setTags(url: URL, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set('tags', value) + if (Array.isArray(value) && value.every(v => typeof v === 'string')) url.searchParams.set('tags', value.join(',')) +} + +function setStringOrNumber(url: URL, key: string, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set(key, value) + if (typeof value === 'number' && Number.isFinite(value)) url.searchParams.set(key, String(value)) +} + +function setStringList(url: URL, key: string, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set(key, value) + if (Array.isArray(value) && value.every(v => typeof v === 'string')) url.searchParams.set(key, value.join(',')) +} + +function setIfInt(url: URL, key: string, value: unknown, options?: { min?: number; max?: number }) { + if (typeof value !== 'number' || !Number.isInteger(value)) return + if (options?.min !== undefined && value < options.min) return + if (options?.max !== undefined && value > options.max) return + url.searchParams.set(key, String(value)) +} + +function setBooleanFlag(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, value ? '1' : '0') +} + +function flickrExtras(value: unknown): string { + const required = ['license', 'owner_name', 'url_t', 'url_m', 'url_l'] + const extras = new Set(required) + if (typeof value === 'string') { + for (const item of value.split(',')) if (item.trim()) extras.add(item.trim()) + } + if (Array.isArray(value) && value.every(v => typeof v === 'string')) { + for (const item of value) if (item) extras.add(item) + } + return Array.from(extras).join(',') +} + +function flickrLicenseForControls(license: SearchLicenseControls | undefined): string | undefined { + if (!license) return undefined + if (license.allowUnknown) return DEFAULT_LICENSE_FILTER + if (license.commercial && license.modification) return '4,5,9,10,11,12' + if (license.commercial) return '4,5,8,9,10,11,12' + return undefined +} + +function flickrSort(sort: string | undefined): string | undefined { + if (sort === 'interesting') return 'interestingness-desc' + if (sort === 'latest') return 'date-posted-desc' + if (sort === 'relevance') return 'relevance' + return undefined +} + +function flickrSafeSearch(safety: string | undefined): 1 | 2 | 3 | undefined { + if (safety === 'strict') return 1 + if (safety === 'moderate') return 2 + if (safety === 'off') return 3 + return undefined +} + function toReference(p: FlickrPhoto): Reference { const { license, version } = mapFlickrLicense(p.license) const canonicalUrl = `https://www.flickr.com/photos/${p.owner}/${p.id}` @@ -91,17 +195,46 @@ export function flickr(config: FlickrConfig) { id: 'flickr', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: ['sort', 'safety', 'license.commercial', 'license.modification', 'license.allowUnknown', 'creator.id'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { + const opts = q.providerOptions as FlickrSearchOptions | undefined const url = new URL('https://api.flickr.com/services/rest/') url.searchParams.set('method', 'flickr.photos.search') url.searchParams.set('api_key', config.apiKey) url.searchParams.set('text', q.text) - url.searchParams.set('license', config.licenseFilter ?? DEFAULT_LICENSE_FILTER) + url.searchParams.set('license', opts?.licenseFilter ?? flickrLicenseForControls(q.controls?.license) ?? config.licenseFilter ?? DEFAULT_LICENSE_FILTER) url.searchParams.set('content_type', '1') // photos only (no screenshots/other) url.searchParams.set('media', 'photos') url.searchParams.set('sort', 'relevance') - url.searchParams.set('extras', 'license,owner_name,url_t,url_m,url_l') + setIfString(url, 'sort', opts?.sort ?? flickrSort(q.controls?.sort), ['date-posted-asc', 'date-posted-desc', 'date-taken-asc', 'date-taken-desc', 'interestingness-desc', 'interestingness-asc', 'relevance']) + setIfSafeSearch(url, opts?.safeSearch ?? flickrSafeSearch(q.controls?.safety)) + setTags(url, opts?.tags) + setIfString(url, 'tag_mode', opts?.tagMode, ['any', 'all']) + setIfString(url, 'user_id', opts?.userId ?? q.controls?.creator?.id) + setStringOrNumber(url, 'min_upload_date', opts?.minUploadDate) + setStringOrNumber(url, 'max_upload_date', opts?.maxUploadDate) + setStringOrNumber(url, 'min_taken_date', opts?.minTakenDate) + setStringOrNumber(url, 'max_taken_date', opts?.maxTakenDate) + setIfString(url, 'bbox', opts?.bbox) + setIfInt(url, 'accuracy', opts?.accuracy, { min: 1, max: 16 }) + setStringList(url, 'machine_tags', opts?.machineTags) + setIfString(url, 'machine_tag_mode', opts?.machineTagMode, ['any', 'all']) + setIfString(url, 'group_id', opts?.groupId) + setIfString(url, 'woe_id', opts?.woeId) + setIfString(url, 'place_id', opts?.placeId) + setBooleanFlag(url, 'has_geo', opts?.hasGeo) + setIfInt(url, 'geo_context', opts?.geoContext, { min: 0, max: 2 }) + setIfString(url, 'lat', opts?.lat) + setIfString(url, 'lon', opts?.lon) + setStringOrNumber(url, 'radius', opts?.radius) + setIfString(url, 'radius_units', opts?.radiusUnits, ['mi', 'km']) + setBooleanFlag(url, 'is_commons', opts?.isCommons) + setBooleanFlag(url, 'in_gallery', opts?.inGallery) + setBooleanFlag(url, 'is_getty', opts?.isGetty) + url.searchParams.set('extras', flickrExtras(opts?.extras)) + setIfInt(url, 'page', opts?.page, { min: 1 }) url.searchParams.set('per_page', String(q.limit ?? 20)) + setIfInt(url, 'per_page', opts?.perPage, { min: 1, max: 500 }) url.searchParams.set('format', 'json') url.searchParams.set('nojsoncallback', '1') const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) diff --git a/packages/provider-gutendex/src/__tests__/gutendex.test.ts b/packages/provider-gutendex/src/__tests__/gutendex.test.ts index 1707f6c..bf1e82e 100644 --- a/packages/provider-gutendex/src/__tests__/gutendex.test.ts +++ b/packages/provider-gutendex/src/__tests__/gutendex.test.ts @@ -46,6 +46,60 @@ describe('copyrightToLicense', () => { }) describe('gutendex provider', () => { + it('maps unified text controls to Gutendex search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await gutendex().search({ + text: 'great', + modalities: ['text'], + controls: { language: 'en', text: { copyright: 'public-domain' }, page: 2 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('languages')).toBe('en') + expect(url.searchParams.get('copyright')).toBe('false') + expect(url.searchParams.get('page')).toBe('2') + }) + + it('forwards documented Gutendex search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await gutendex().search({ + text: 'great', + modalities: ['text'], + providerOptions: { + authorYearStart: 1800, + authorYearEnd: 1899, + copyright: ['false', 'null'], + ids: ['1400', '84'], + languages: ['en', 'fr'], + mimeType: 'text/html', + sort: 'ascending', + topic: 'children', + page: 4, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('author_year_start')).toBe('1800') + expect(url.searchParams.get('author_year_end')).toBe('1899') + expect(url.searchParams.get('copyright')).toBe('false,null') + expect(url.searchParams.get('ids')).toBe('1400,84') + expect(url.searchParams.get('languages')).toBe('en,fr') + expect(url.searchParams.get('mime_type')).toBe('text/html') + expect(url.searchParams.get('sort')).toBe('ascending') + expect(url.searchParams.get('topic')).toBe('children') + expect(url.searchParams.get('page')).toBe('4') + }) + it('filters non-Text media and maps the rest to text References', async () => { const refs = await gutendex().search({ text: 'great expectations', modalities: ['text'] }, ctxWith(FIXTURE)) expect(refs).toHaveLength(2) // the Sound record is filtered out diff --git a/packages/provider-gutendex/src/index.ts b/packages/provider-gutendex/src/index.ts index 3f9f8eb..bd71c92 100644 --- a/packages/provider-gutendex/src/index.ts +++ b/packages/provider-gutendex/src/index.ts @@ -9,6 +9,18 @@ export interface GutendexConfig { userAgent?: string } +export interface GutendexSearchOptions { + authorYearStart?: number + authorYearEnd?: number + copyright?: 'true' | 'false' | 'null' | readonly ('true' | 'false' | 'null')[] + ids?: string | readonly string[] + languages?: string | readonly string[] + mimeType?: string + sort?: 'ascending' | 'descending' | 'popular' + topic?: string + page?: number +} + interface GutendexAuthor { name: string; birth_year: number | null; death_year: number | null } interface GutendexResult { id: number @@ -56,14 +68,56 @@ function toReference(r: GutendexResult): Reference { } } +function setIfInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value)) return + url.searchParams.set(key, String(value)) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(value)) +} + +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string' || !value) return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfStringList(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value === 'string' && value) { + if (allowed && !value.split(',').every(v => allowed.includes(v))) return + url.searchParams.set(key, value) + } + if (Array.isArray(value) && value.every(v => typeof v === 'string')) { + if (allowed && !value.every(v => allowed.includes(v))) return + url.searchParams.set(key, value.join(',')) + } +} + export function gutendex(config: GutendexConfig = {}) { return defineProvider({ id: 'gutendex', modalities: ['text'], queryFeatures: ['keyword'], + capabilities: { controls: ['language', 'text.copyright', 'page'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://gutendex.com/books/') url.searchParams.set('search', q.text) + if (q.controls?.language) url.searchParams.set('languages', q.controls.language) + if (q.controls?.text?.copyright === 'public-domain') url.searchParams.set('copyright', 'false') + if (q.controls?.text?.copyright === 'copyrighted') url.searchParams.set('copyright', 'true') + if (q.controls?.page) url.searchParams.set('page', String(q.controls.page)) + const opts = q.providerOptions as GutendexSearchOptions | undefined + setIfInt(url, 'author_year_start', opts?.authorYearStart) + setIfInt(url, 'author_year_end', opts?.authorYearEnd) + setIfStringList(url, 'copyright', opts?.copyright, ['true', 'false', 'null']) + setIfStringList(url, 'ids', opts?.ids) + setIfStringList(url, 'languages', opts?.languages) + setIfString(url, 'mime_type', opts?.mimeType) + setIfString(url, 'sort', opts?.sort, ['ascending', 'descending', 'popular']) + setIfString(url, 'topic', opts?.topic) + setIfPositiveInt(url, 'page', opts?.page) const res = await ctx.fetch(url.toString(), { headers: { 'User-Agent': config.userAgent ?? 'refkit (+https://github.com/MyPrototypeWhat/refkit)' }, signal: ctx.signal, diff --git a/packages/provider-met/src/__tests__/met.test.ts b/packages/provider-met/src/__tests__/met.test.ts index 7b84a3c..46f2662 100644 --- a/packages/provider-met/src/__tests__/met.test.ts +++ b/packages/provider-met/src/__tests__/met.test.ts @@ -46,4 +46,46 @@ describe('met provider', () => { const refs = await met().search({ text: 'zzz', modalities: ['image'] }, ctxRouting({ total: 0, objectIDs: null }, {})) expect(refs).toEqual([]) }) + + it('forwards documented Met search options', async () => { + let searchUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + const u = String(input) + if (u.includes('/search')) { + searchUrl = u + return new Response(JSON.stringify({ total: 0, objectIDs: null }), { status: 200 }) + } + return new Response('null', { status: 404 }) + }) as typeof fetch, + } + await met().search({ + text: 'wheat', + modalities: ['image'], + providerOptions: { + isHighlight: true, + title: true, + tags: true, + departmentId: 11, + isOnView: true, + artistOrCulture: true, + medium: 'Oil Paintings', + geoLocation: 'France', + dateBegin: 1700, + dateEnd: 1800, + }, + }, ctx) + const url = new URL(searchUrl) + expect(url.searchParams.get('isHighlight')).toBe('true') + expect(url.searchParams.get('title')).toBe('true') + expect(url.searchParams.get('tags')).toBe('true') + expect(url.searchParams.get('departmentId')).toBe('11') + expect(url.searchParams.get('isOnView')).toBe('true') + expect(url.searchParams.get('artistOrCulture')).toBe('true') + expect(url.searchParams.get('medium')).toBe('Oil Paintings') + expect(url.searchParams.get('geoLocation')).toBe('France') + expect(url.searchParams.get('dateBegin')).toBe('1700') + expect(url.searchParams.get('dateEnd')).toBe('1800') + expect(url.searchParams.get('hasImages')).toBe('true') + }) }) diff --git a/packages/provider-met/src/index.ts b/packages/provider-met/src/index.ts index 6195f0a..a5ba383 100644 --- a/packages/provider-met/src/index.ts +++ b/packages/provider-met/src/index.ts @@ -9,6 +9,19 @@ export interface MetConfig { maxObjects?: number } +export interface MetSearchOptions { + isHighlight?: boolean + title?: boolean + tags?: boolean + departmentId?: number + isOnView?: boolean + artistOrCulture?: boolean + medium?: string + geoLocation?: string + dateBegin?: number + dateEnd?: number +} + const BASE = 'https://collectionapi.metmuseum.org/public/collection/v1' interface MetSearchResponse { total: number; objectIDs: number[] | null } @@ -51,15 +64,42 @@ function toReference(o: MetObject): Reference | null { } } +function setIfBoolean(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, String(value)) +} + +function setIfInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value)) return + url.searchParams.set(key, String(value)) +} + +function setIfString(url: URL, key: string, value: unknown) { + if (typeof value !== 'string' || !value) return + url.searchParams.set(key, value) +} + export function met(config: MetConfig = {}) { return defineProvider({ id: 'met', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: [] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const searchUrl = new URL(`${BASE}/search`) searchUrl.searchParams.set('q', q.text) searchUrl.searchParams.set('hasImages', 'true') + const opts = q.providerOptions as MetSearchOptions | undefined + setIfBoolean(searchUrl, 'isHighlight', opts?.isHighlight) + setIfBoolean(searchUrl, 'title', opts?.title) + setIfBoolean(searchUrl, 'tags', opts?.tags) + setIfInt(searchUrl, 'departmentId', opts?.departmentId) + setIfBoolean(searchUrl, 'isOnView', opts?.isOnView) + setIfBoolean(searchUrl, 'artistOrCulture', opts?.artistOrCulture) + setIfString(searchUrl, 'medium', opts?.medium) + setIfString(searchUrl, 'geoLocation', opts?.geoLocation) + setIfInt(searchUrl, 'dateBegin', opts?.dateBegin) + setIfInt(searchUrl, 'dateEnd', opts?.dateEnd) const res = await ctx.fetch(searchUrl.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`met search failed: ${res.status}`) const { objectIDs } = (await res.json()) as MetSearchResponse diff --git a/packages/provider-openverse/src/__tests__/openverse.test.ts b/packages/provider-openverse/src/__tests__/openverse.test.ts index 92ec9cf..ef57715 100644 --- a/packages/provider-openverse/src/__tests__/openverse.test.ts +++ b/packages/provider-openverse/src/__tests__/openverse.test.ts @@ -39,6 +39,111 @@ describe('mapOpenverseLicense', () => { }) describe('openverse provider', () => { + it('preserves the default Openverse license_type when both unified flags are enabled', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverse().search({ + text: 'sky', + modalities: ['image'], + controls: { license: { commercial: true, modification: true } }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license_type')).toBe('commercial,modification') + }) + + it('maps allowUnknown license control to Openverse license_type=all', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverse().search({ + text: 'sky', + modalities: ['image'], + controls: { license: { allowUnknown: true } }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license_type')).toBe('all') + }) + + it('maps a single unified commercial license flag to Openverse license_type=commercial', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverse().search({ + text: 'sky', + modalities: ['image'], + controls: { license: { commercial: true, modification: false } }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license_type')).toBe('commercial') + }) + + it('forwards documented Openverse image search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverse().search({ + text: 'sky', + modalities: ['image'], + providerOptions: { + source: ['flickr', 'rawpixel'], + excludedSource: 'thingiverse', + tags: ['blue', 'sky'], + license: ['cc0', 'by'], + licenseType: 'all-cc', + filterDead: false, + extension: ['jpg', 'png'], + mature: true, + sortBy: 'indexed_on', + sortDir: 'desc', + authority: true, + authorityBoost: 2.5, + includeSensitiveResults: true, + category: ['photograph', 'illustration'], + aspectRatio: 'wide', + size: 'large', + page: 3, + pageSize: 17, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('q')).toBeNull() + expect(url.searchParams.get('source')).toBe('flickr,rawpixel') + expect(url.searchParams.get('excluded_source')).toBe('thingiverse') + expect(url.searchParams.get('tags')).toBe('blue,sky') + expect(url.searchParams.get('license')).toBe('cc0,by') + expect(url.searchParams.get('license_type')).toBe('all-cc') + expect(url.searchParams.get('filter_dead')).toBe('false') + expect(url.searchParams.get('extension')).toBe('jpg,png') + expect(url.searchParams.get('mature')).toBe('true') + expect(url.searchParams.get('unstable__sort_by')).toBe('indexed_on') + expect(url.searchParams.get('unstable__sort_dir')).toBe('desc') + expect(url.searchParams.get('unstable__authority')).toBe('true') + expect(url.searchParams.get('unstable__authority_boost')).toBe('2.5') + expect(url.searchParams.get('unstable__include_sensitive_results')).toBe('true') + expect(url.searchParams.get('category')).toBe('photograph,illustration') + expect(url.searchParams.get('aspect_ratio')).toBe('wide') + expect(url.searchParams.get('size')).toBe('large') + expect(url.searchParams.get('page')).toBe('3') + expect(url.searchParams.get('page_size')).toBe('17') + }) + it('maps results to normalized References with correct provenance', async () => { const refs = await openverse().search({ text: 'sky', modalities: ['image'] }, ctxWith(FIXTURE)) expect(refs).toHaveLength(2) @@ -106,6 +211,55 @@ describe('openverseAudio provider', () => { expect(evaluateUse(r.rights, 'commercial-product').decision).toBe('allowed-with-attribution') }) + it('shares the license_type helper with audio searches', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverseAudio().search({ + text: 'piano', + modalities: ['audio'], + controls: { license: { allowUnknown: true } }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('license_type')).toBe('all') + }) + + it('forwards documented Openverse audio search options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ results: [] }), { status: 200 }) + }) as typeof fetch, + } + await openverseAudio().search({ + text: 'piano', + modalities: ['audio'], + providerOptions: { + creator: 'benpm', + source: 'freesound', + category: 'music', + length: 'short', + page: 2, + pageSize: 9, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('q')).toBeNull() + expect(url.searchParams.get('creator')).toBe('benpm') + expect(url.searchParams.get('source')).toBe('freesound') + expect(url.searchParams.get('category')).toBe('music') + expect(url.searchParams.get('length')).toBe('short') + expect(url.searchParams.get('page')).toBe('2') + expect(url.searchParams.get('page_size')).toBe('9') + expect(url.searchParams.get('aspect_ratio')).toBeNull() + expect(url.searchParams.get('size')).toBeNull() + }) + it('a by-nc audio item maps to proprietary and is denied for commercial use (moat)', async () => { const NC = { results: [{ ...AUDIO.results[0], license: 'by-nc', license_version: '3.0' }] } const refs = await openverseAudio().search({ text: 'x', modalities: ['audio'] }, ctxWith(NC)) diff --git a/packages/provider-openverse/src/index.ts b/packages/provider-openverse/src/index.ts index e78fbbc..e20b79a 100644 --- a/packages/provider-openverse/src/index.ts +++ b/packages/provider-openverse/src/index.ts @@ -9,6 +9,38 @@ export interface OpenverseConfig { token?: string } +export interface OpenverseSearchOptions { + page?: number + pageSize?: number + source?: string | readonly string[] + excludedSource?: string | readonly string[] + tags?: string | readonly string[] + title?: string | readonly string[] + creator?: string | readonly string[] + collection?: 'tag' | 'source' | 'creator' + collectionTag?: string + license?: string | readonly string[] + licenseType?: 'all' | 'all-cc' | 'commercial' | 'modification' + filterDead?: boolean + extension?: string | readonly string[] + mature?: boolean + sortBy?: 'relevance' | 'indexed_on' + sortDir?: 'desc' | 'asc' + authority?: boolean + authorityBoost?: number + includeSensitiveResults?: boolean + category?: string | readonly string[] +} + +export interface OpenverseImageSearchOptions extends OpenverseSearchOptions { + aspectRatio?: string | readonly string[] + size?: string | readonly string[] +} + +export interface OpenverseAudioSearchOptions extends OpenverseSearchOptions { + length?: string | readonly string[] +} + interface OpenverseResult { id: string title: string | null @@ -47,6 +79,76 @@ export function mapOpenverseLicense(code: string): LicenseId { } } +function openverseLicenseType(license: import('@refkit/core').SearchLicenseControls | undefined): string { + if (license?.allowUnknown) return 'all' + const types: string[] = [] + if (license?.commercial) types.push('commercial') + if (license?.modification) types.push('modification') + return types.length > 0 ? types.join(',') : 'commercial,modification' +} + +function setIfPositiveInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(value)) +} + +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string' || !value) return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfStringList(url: URL, key: string, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set(key, value) + if (Array.isArray(value) && value.length > 0 && value.every(v => typeof v === 'string' && v)) url.searchParams.set(key, value.join(',')) +} + +function setIfBoolean(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, String(value)) +} + +function setIfNumber(url: URL, key: string, value: unknown, options?: { min?: number; max?: number }) { + if (typeof value !== 'number' || !Number.isFinite(value)) return + if (options?.min !== undefined && value < options.min) return + if (options?.max !== undefined && value > options.max) return + url.searchParams.set(key, String(value)) +} + +function hasStringList(value: unknown): boolean { + return (typeof value === 'string' && value.length > 0) + || (Array.isArray(value) && value.some(v => typeof v === 'string' && v.length > 0)) +} + +function applyOpenverseSearchOptions(url: URL, opts: OpenverseSearchOptions | undefined) { + if (!opts) return + setIfPositiveInt(url, 'page', opts.page) + setIfPositiveInt(url, 'page_size', opts.pageSize) + setIfStringList(url, 'source', opts.source) + setIfStringList(url, 'excluded_source', opts.excludedSource) + const fieldSearch = hasStringList(opts.tags) || hasStringList(opts.title) || hasStringList(opts.creator) + if (fieldSearch) url.searchParams.delete('q') + setIfStringList(url, 'tags', opts.tags) + setIfStringList(url, 'title', opts.title) + setIfStringList(url, 'creator', opts.creator) + if (opts.collection === 'tag' || opts.collection === 'source' || opts.collection === 'creator') { + url.searchParams.delete('q') + setIfString(url, 'unstable__collection', opts.collection, ['tag', 'source', 'creator']) + } + setIfString(url, 'unstable__tag', opts.collectionTag) + setIfStringList(url, 'license', opts.license) + setIfString(url, 'license_type', opts.licenseType, ['all', 'all-cc', 'commercial', 'modification']) + setIfBoolean(url, 'filter_dead', opts.filterDead) + setIfStringList(url, 'extension', opts.extension) + setIfBoolean(url, 'mature', opts.mature) + setIfString(url, 'unstable__sort_by', opts.sortBy, ['relevance', 'indexed_on']) + setIfString(url, 'unstable__sort_dir', opts.sortDir, ['desc', 'asc']) + setIfBoolean(url, 'unstable__authority', opts.authority) + setIfNumber(url, 'unstable__authority_boost', opts.authorityBoost, { min: 0, max: 10 }) + setIfBoolean(url, 'unstable__include_sensitive_results', opts.includeSensitiveResults) + setIfStringList(url, 'category', opts.category) +} + function toReference(r: OpenverseResult): Reference { const license = mapOpenverseLicense(r.license) const rights: RightsRecord = { @@ -78,11 +180,16 @@ export function openverse(config: OpenverseConfig = {}) { id: 'openverse', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: ['license.commercial', 'license.modification', 'license.allowUnknown'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.openverse.org/v1/images/') url.searchParams.set('q', q.text) - url.searchParams.set('license_type', 'commercial,modification') // performance/relevance hint only — the AUTHORITATIVE rights gate is mapOpenverseLicense below, not this filter + url.searchParams.set('license_type', openverseLicenseType(q.controls?.license)) // performance/relevance hint only — the AUTHORITATIVE rights gate is mapOpenverseLicense below, not this filter url.searchParams.set('page_size', String(q.limit ?? 20)) + const opts = q.providerOptions as OpenverseImageSearchOptions | undefined + applyOpenverseSearchOptions(url, opts) + setIfStringList(url, 'aspect_ratio', opts?.aspectRatio) + setIfStringList(url, 'size', opts?.size) const headers: Record = {} if (config.token) headers.Authorization = `Bearer ${config.token}` const res = await ctx.fetch(url.toString(), { headers, signal: ctx.signal }) @@ -143,11 +250,15 @@ export function openverseAudio(config: OpenverseConfig = {}) { id: 'openverse-audio', modalities: ['audio'], queryFeatures: ['keyword'], + capabilities: { controls: ['license.commercial', 'license.modification', 'license.allowUnknown'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.openverse.org/v1/audio/') url.searchParams.set('q', q.text) - url.searchParams.set('license_type', 'commercial,modification') // relevance hint; mapOpenverseLicense authoritative + url.searchParams.set('license_type', openverseLicenseType(q.controls?.license)) // relevance hint; mapOpenverseLicense authoritative url.searchParams.set('page_size', String(q.limit ?? 20)) + const opts = q.providerOptions as OpenverseAudioSearchOptions | undefined + applyOpenverseSearchOptions(url, opts) + setIfStringList(url, 'length', opts?.length) const headers: Record = {} if (config.token) headers.Authorization = `Bearer ${config.token}` const res = await ctx.fetch(url.toString(), { headers, signal: ctx.signal }) diff --git a/packages/provider-pexels/src/__tests__/pexels.test.ts b/packages/provider-pexels/src/__tests__/pexels.test.ts index 5dbc646..9576f6a 100644 --- a/packages/provider-pexels/src/__tests__/pexels.test.ts +++ b/packages/provider-pexels/src/__tests__/pexels.test.ts @@ -28,6 +28,70 @@ describe('pexels provider', () => { expect(r.thumbnail?.url).toBe('https://images.pexels.com/photos/3573351/x?h=200') expect(r.visual).toEqual({ width: 3066, height: 3968, dominantColors: ['#374824'] }) }) + + it('forwards documented photo search filters and Pexels-specific options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexels({ apiKey: 'k' }).search({ + text: 'trees', + modalities: ['image'], + filters: { orientation: 'portrait', color: '#ffffff', language: 'zh-CN' }, + providerOptions: { orientation: 'landscape', color: 'red', size: 'large', locale: 'fr-FR', page: 2, perPage: 11 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('landscape') + expect(url.searchParams.get('color')).toBe('red') + expect(url.searchParams.get('locale')).toBe('fr-FR') + expect(url.searchParams.get('size')).toBe('large') + expect(url.searchParams.get('page')).toBe('2') + expect(url.searchParams.get('per_page')).toBe('11') + }) + + it('maps unified controls to documented Pexels photo search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexels({ apiKey: 'k' }).search({ + text: 'trees', + modalities: ['image'], + controls: { orientation: 'portrait', color: '#ffffff', language: 'zh-CN', media: { size: 'large' }, page: 2 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('portrait') + expect(url.searchParams.get('color')).toBe('#ffffff') + expect(url.searchParams.get('locale')).toBe('zh-CN') + expect(url.searchParams.get('size')).toBe('large') + expect(url.searchParams.get('page')).toBe('2') + }) + + it('keeps primary controls ahead of conflicting legacy filters in mixed migration calls', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexels({ apiKey: 'k' }).search({ + text: 'trees', + modalities: ['image'], + filters: { orientation: 'landscape', color: '#000000', language: 'en-US' }, + controls: { orientation: 'portrait', color: '#ffffff', language: 'zh-CN' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('portrait') + expect(url.searchParams.get('color')).toBe('#ffffff') + expect(url.searchParams.get('locale')).toBe('zh-CN') + }) }) describe('pexelsVideo provider', () => { @@ -59,4 +123,68 @@ describe('pexelsVideo provider', () => { expect(r.visual).toEqual({ width: 2560, height: 1440 }) expect(evaluateUse(r.rights, 'commercial-product').decision).toBe('allowed') // pexels license is commercial-OK }) + + it('forwards documented video search filters and Pexels-specific options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(VIDEO_FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexelsVideo({ apiKey: 'k' }).search({ + text: 'cat', + modalities: ['video'], + filters: { orientation: 'landscape', language: 'en-US' }, + providerOptions: { orientation: 'portrait', size: 'medium', locale: 'fr-FR', page: 3, perPage: 12, color: 'red' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('portrait') + expect(url.searchParams.get('locale')).toBe('fr-FR') + expect(url.searchParams.get('size')).toBe('medium') + expect(url.searchParams.get('page')).toBe('3') + expect(url.searchParams.get('per_page')).toBe('12') + expect(url.searchParams.get('color')).toBeNull() + }) + + it('maps unified controls to documented Pexels video search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(VIDEO_FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexelsVideo({ apiKey: 'k' }).search({ + text: 'cat', + modalities: ['video'], + controls: { orientation: 'landscape', language: 'en-US', media: { size: 'medium' }, page: 3 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('landscape') + expect(url.searchParams.get('locale')).toBe('en-US') + expect(url.searchParams.get('size')).toBe('medium') + expect(url.searchParams.get('page')).toBe('3') + }) + + it('does not forward color controls to the Pexels video endpoint', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(VIDEO_FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await pexelsVideo({ apiKey: 'k' }).search({ + text: 'cat', + modalities: ['video'], + controls: { orientation: 'landscape', color: '#ffffff', language: 'en-US', media: { size: 'medium' }, page: 3 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('landscape') + expect(url.searchParams.get('locale')).toBe('en-US') + expect(url.searchParams.get('size')).toBe('medium') + expect(url.searchParams.get('page')).toBe('3') + expect(url.searchParams.get('color')).toBeNull() + }) }) diff --git a/packages/provider-pexels/src/index.ts b/packages/provider-pexels/src/index.ts index d9c5ba8..e5e5ec3 100644 --- a/packages/provider-pexels/src/index.ts +++ b/packages/provider-pexels/src/index.ts @@ -5,6 +5,15 @@ import { export interface PexelsConfig { apiKey: string } +export interface PexelsSearchOptions { + orientation?: 'landscape' | 'portrait' | 'square' + color?: string + size?: 'large' | 'medium' | 'small' + locale?: string + page?: number + perPage?: number +} + interface PexelsPhoto { id: number width: number @@ -18,6 +27,42 @@ interface PexelsPhoto { } interface PexelsResponse { photos: PexelsPhoto[] } +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string') return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown, max?: number) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(max ? Math.min(value, max) : value)) +} + +function useLegacyFilter(control: T | undefined, legacy: T | undefined): T | undefined { + return control === undefined ? legacy : undefined +} + +function applyPexelsSearchParams(url: URL, q: NormalizedQuery, options?: { allowColor?: boolean }) { + if (q.controls?.orientation) url.searchParams.set('orientation', q.controls.orientation) + if (options?.allowColor && q.controls?.color) url.searchParams.set('color', q.controls.color) + if (q.controls?.language) url.searchParams.set('locale', q.controls.language) + if (q.controls?.media?.size) url.searchParams.set('size', q.controls.media.size) + if (q.controls?.page) url.searchParams.set('page', String(q.controls.page)) + const legacyOrientation = useLegacyFilter(q.controls?.orientation, q.filters?.orientation) + if (legacyOrientation) url.searchParams.set('orientation', legacyOrientation) + const legacyColor = useLegacyFilter(q.controls?.color, q.filters?.color) + if (options?.allowColor && legacyColor) url.searchParams.set('color', legacyColor) + const legacyLanguage = useLegacyFilter(q.controls?.language, q.filters?.language) + if (legacyLanguage) url.searchParams.set('locale', legacyLanguage) + const opts = q.providerOptions as PexelsSearchOptions | undefined + setIfString(url, 'orientation', opts?.orientation, ['landscape', 'portrait', 'square']) + if (options?.allowColor) setIfString(url, 'color', opts?.color) + setIfString(url, 'size', opts?.size, ['large', 'medium', 'small']) + setIfString(url, 'locale', opts?.locale) + setIfPositiveInt(url, 'page', opts?.page) + setIfPositiveInt(url, 'per_page', opts?.perPage, 80) +} + function toReference(p: PexelsPhoto): Reference { const rights: RightsRecord = { license: 'pexels', @@ -45,11 +90,13 @@ export function pexels(config: PexelsConfig) { return defineProvider({ id: 'pexels', modalities: ['image'], - queryFeatures: ['keyword'], + queryFeatures: ['keyword', 'color', 'orientation', 'language'], + capabilities: { controls: ['orientation', 'color', 'language', 'media.size', 'page'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.pexels.com/v1/search') url.searchParams.set('query', q.text) url.searchParams.set('per_page', String(Math.min(q.limit ?? 15, 80))) + applyPexelsSearchParams(url, q, { allowColor: true }) const res = await ctx.fetch(url.toString(), { headers: { Authorization: config.apiKey }, signal: ctx.signal }) if (!res.ok) throw new Error(`pexels search failed: ${res.status}`) const json = (await res.json()) as PexelsResponse @@ -104,11 +151,13 @@ export function pexelsVideo(config: PexelsConfig) { return defineProvider({ id: 'pexels-video', modalities: ['video'], - queryFeatures: ['keyword'], + queryFeatures: ['keyword', 'orientation', 'language'], + capabilities: { controls: ['orientation', 'language', 'media.size', 'page'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.pexels.com/videos/search') url.searchParams.set('query', q.text) url.searchParams.set('per_page', String(Math.min(q.limit ?? 15, 80))) + applyPexelsSearchParams(url, q) const res = await ctx.fetch(url.toString(), { headers: { Authorization: config.apiKey }, signal: ctx.signal }) if (!res.ok) throw new Error(`pexels video search failed: ${res.status}`) const json = (await res.json()) as PexelsVideoResponse diff --git a/packages/provider-pixabay/src/__tests__/pixabay.test.ts b/packages/provider-pixabay/src/__tests__/pixabay.test.ts index 6309c1e..4cc8875 100644 --- a/packages/provider-pixabay/src/__tests__/pixabay.test.ts +++ b/packages/provider-pixabay/src/__tests__/pixabay.test.ts @@ -31,6 +31,92 @@ describe('pixabay provider', () => { expect(r.thumbnail).toEqual({ url: 'https://cdn.pixabay.com/photo/flower-195893_150.jpg', width: 150, height: 84 }) expect(r.visual).toEqual({ width: 4000, height: 2250 }) }) + + it('forwards documented image search filters and Pixabay-specific options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { calledUrl = String(input); return new Response(JSON.stringify(FIXTURE), { status: 200 }) }) as typeof fetch, + } + await pixabay({ key: 'SECRET' }).search({ + text: 'flowers', + modalities: ['image'], + filters: { orientation: 'landscape', color: 'blue', language: 'de' }, + providerOptions: { + imageType: 'illustration', + orientation: 'vertical', + category: 'nature', + minWidth: 1200, + minHeight: 800, + colors: ['blue', 'transparent'], + safesearch: true, + order: 'latest', + editorsChoice: true, + lang: 'fr', + id: '195893', + page: 4, + perPage: 33, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('vertical') + expect(url.searchParams.get('colors')).toBe('blue,transparent') + expect(url.searchParams.get('lang')).toBe('fr') + expect(url.searchParams.get('id')).toBe('195893') + expect(url.searchParams.get('image_type')).toBe('illustration') + expect(url.searchParams.get('category')).toBe('nature') + expect(url.searchParams.get('min_width')).toBe('1200') + expect(url.searchParams.get('min_height')).toBe('800') + expect(url.searchParams.get('safesearch')).toBe('true') + expect(url.searchParams.get('order')).toBe('latest') + expect(url.searchParams.get('editors_choice')).toBe('true') + expect(url.searchParams.get('page')).toBe('4') + expect(url.searchParams.get('per_page')).toBe('33') + }) + + it('maps unified controls to documented Pixabay image search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { calledUrl = String(input); return new Response(JSON.stringify(FIXTURE), { status: 200 }) }) as typeof fetch, + } + await pixabay({ key: 'SECRET' }).search({ + text: 'flowers', + modalities: ['image'], + controls: { + orientation: 'landscape', + color: 'blue', + language: 'de', + sort: 'latest', + safety: 'strict', + media: { kind: 'illustration', minWidth: 1200, minHeight: 800 }, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('horizontal') + expect(url.searchParams.get('colors')).toBe('blue') + expect(url.searchParams.get('lang')).toBe('de') + expect(url.searchParams.get('image_type')).toBe('illustration') + expect(url.searchParams.get('min_width')).toBe('1200') + expect(url.searchParams.get('min_height')).toBe('800') + expect(url.searchParams.get('safesearch')).toBe('true') + expect(url.searchParams.get('order')).toBe('latest') + }) + + it('keeps primary controls ahead of conflicting legacy filters in mixed migration calls', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { calledUrl = String(input); return new Response(JSON.stringify(FIXTURE), { status: 200 }) }) as typeof fetch, + } + await pixabay({ key: 'SECRET' }).search({ + text: 'flowers', + modalities: ['image'], + filters: { orientation: 'portrait', color: 'red', language: 'en' }, + controls: { orientation: 'landscape', color: 'blue', language: 'de' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('horizontal') + expect(url.searchParams.get('colors')).toBe('blue') + expect(url.searchParams.get('lang')).toBe('de') + }) }) describe('pixabayVideo provider', () => { @@ -64,4 +150,41 @@ describe('pixabayVideo provider', () => { expect(r.visual).toEqual({ width: 1920, height: 1080 }) expect(evaluateUse(r.rights, 'commercial-product').decision).toBe('allowed') // pixabay license is commercial-OK }) + + it('forwards documented video search filters and Pixabay-specific options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { calledUrl = String(input); return new Response(JSON.stringify(VIDEO_FIXTURE), { status: 200 }) }) as typeof fetch, + } + await pixabayVideo({ key: 'SECRET' }).search({ + text: 'flowers', + modalities: ['video'], + filters: { language: 'fr' }, + providerOptions: { + videoType: 'animation', + category: 'education', + minWidth: 1920, + minHeight: 1080, + safesearch: true, + order: 'latest', + editorsChoice: true, + lang: 'de', + id: '125', + page: 5, + perPage: 44, + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('lang')).toBe('de') + expect(url.searchParams.get('id')).toBe('125') + expect(url.searchParams.get('video_type')).toBe('animation') + expect(url.searchParams.get('category')).toBe('education') + expect(url.searchParams.get('min_width')).toBe('1920') + expect(url.searchParams.get('min_height')).toBe('1080') + expect(url.searchParams.get('safesearch')).toBe('true') + expect(url.searchParams.get('order')).toBe('latest') + expect(url.searchParams.get('editors_choice')).toBe('true') + expect(url.searchParams.get('page')).toBe('5') + expect(url.searchParams.get('per_page')).toBe('44') + }) }) diff --git a/packages/provider-pixabay/src/index.ts b/packages/provider-pixabay/src/index.ts index 8a66775..86f71bb 100644 --- a/packages/provider-pixabay/src/index.ts +++ b/packages/provider-pixabay/src/index.ts @@ -5,6 +5,36 @@ import { export interface PixabayConfig { key: string } +export interface PixabayImageSearchOptions { + lang?: string + id?: string + imageType?: 'all' | 'photo' | 'illustration' | 'vector' + orientation?: 'all' | 'horizontal' | 'vertical' + category?: string + minWidth?: number + minHeight?: number + colors?: string | readonly string[] + safesearch?: boolean + order?: 'popular' | 'latest' + editorsChoice?: boolean + page?: number + perPage?: number +} + +export interface PixabayVideoSearchOptions { + lang?: string + id?: string + videoType?: 'all' | 'film' | 'animation' + category?: string + minWidth?: number + minHeight?: number + safesearch?: boolean + order?: 'popular' | 'latest' + editorsChoice?: boolean + page?: number + perPage?: number +} + interface PixabayHit { id: number tags: string @@ -20,6 +50,49 @@ interface PixabayHit { } interface PixabayResponse { hits: PixabayHit[] } +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string') return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setIfStringList(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value === 'string') { + if (!value) return + if (allowed && !value.split(',').every(v => allowed.includes(v))) return + url.searchParams.set(key, value) + } + if (Array.isArray(value) && value.every(v => typeof v === 'string')) { + if (allowed && !value.every(v => allowed.includes(v))) return + url.searchParams.set(key, value.join(',')) + } +} + +function setIfNonNegativeInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 0) return + url.searchParams.set(key, String(value)) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown, options?: { min?: number; max?: number }) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < (options?.min ?? 1)) return + url.searchParams.set(key, String(Math.min(value, options?.max ?? value))) +} + +function setIfBoolean(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, String(value)) +} + +function useLegacyFilter(control: T | undefined, legacy: T | undefined): T | undefined { + return control === undefined ? legacy : undefined +} + +function pixabayOrientation(orientation: string | undefined): string | undefined { + if (orientation === 'landscape') return 'horizontal' + if (orientation === 'portrait') return 'vertical' + return undefined +} + function toReference(h: PixabayHit): Reference { const rights: RightsRecord = { license: 'pixabay', @@ -46,13 +119,46 @@ export function pixabay(config: PixabayConfig) { return defineProvider({ id: 'pixabay', modalities: ['image'], - queryFeatures: ['keyword'], + queryFeatures: ['keyword', 'color', 'orientation', 'language'], + capabilities: { controls: ['orientation', 'color', 'language', 'sort', 'safety', 'media.kind', 'media.minWidth', 'media.minHeight'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://pixabay.com/api/') url.searchParams.set('key', config.key) url.searchParams.set('q', q.text) url.searchParams.set('image_type', 'photo') url.searchParams.set('per_page', String(Math.min(Math.max(q.limit ?? 20, 3), 200))) + if (q.controls?.language) url.searchParams.set('lang', q.controls.language) + if (q.controls?.color) url.searchParams.set('colors', q.controls.color) + const controlsOrientation = pixabayOrientation(q.controls?.orientation) + if (controlsOrientation) url.searchParams.set('orientation', controlsOrientation) + if (q.controls?.sort === 'latest' || q.controls?.sort === 'popular') url.searchParams.set('order', q.controls.sort) + if (q.controls?.safety === 'strict') url.searchParams.set('safesearch', 'true') + if (q.controls?.safety === 'off') url.searchParams.set('safesearch', 'false') + if (q.controls?.media?.kind === 'photo' || q.controls?.media?.kind === 'illustration' || q.controls?.media?.kind === 'vector') { + url.searchParams.set('image_type', q.controls.media.kind) + } + if (q.controls?.media?.minWidth !== undefined) url.searchParams.set('min_width', String(q.controls.media.minWidth)) + if (q.controls?.media?.minHeight !== undefined) url.searchParams.set('min_height', String(q.controls.media.minHeight)) + const legacyLanguage = useLegacyFilter(q.controls?.language, q.filters?.language) + if (legacyLanguage) url.searchParams.set('lang', legacyLanguage) + const legacyColor = useLegacyFilter(q.controls?.color, q.filters?.color) + if (legacyColor) url.searchParams.set('colors', legacyColor) + const orientation = pixabayOrientation(useLegacyFilter(q.controls?.orientation, q.filters?.orientation)) + if (orientation) url.searchParams.set('orientation', orientation) + const opts = q.providerOptions as PixabayImageSearchOptions | undefined + setIfString(url, 'lang', opts?.lang) + setIfString(url, 'id', opts?.id) + setIfString(url, 'image_type', opts?.imageType, ['all', 'photo', 'illustration', 'vector']) + setIfString(url, 'orientation', opts?.orientation, ['all', 'horizontal', 'vertical']) + setIfString(url, 'category', opts?.category) + setIfNonNegativeInt(url, 'min_width', opts?.minWidth) + setIfNonNegativeInt(url, 'min_height', opts?.minHeight) + setIfStringList(url, 'colors', opts?.colors, ['grayscale', 'transparent', 'red', 'orange', 'yellow', 'green', 'turquoise', 'blue', 'lilac', 'pink', 'white', 'gray', 'black', 'brown']) + setIfBoolean(url, 'safesearch', opts?.safesearch) + setIfString(url, 'order', opts?.order, ['popular', 'latest']) + setIfBoolean(url, 'editors_choice', opts?.editorsChoice) + setIfPositiveInt(url, 'page', opts?.page) + setIfPositiveInt(url, 'per_page', opts?.perPage, { min: 3, max: 200 }) const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`pixabay search failed: ${res.status}`) const json = (await res.json()) as PixabayResponse @@ -102,12 +208,36 @@ export function pixabayVideo(config: PixabayConfig) { return defineProvider({ id: 'pixabay-video', modalities: ['video'], - queryFeatures: ['keyword'], + queryFeatures: ['keyword', 'language'], + capabilities: { controls: ['language', 'sort', 'safety', 'media.kind', 'media.minWidth', 'media.minHeight'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://pixabay.com/api/videos/') url.searchParams.set('key', config.key) url.searchParams.set('q', q.text) url.searchParams.set('per_page', String(Math.min(Math.max(q.limit ?? 20, 3), 200))) + if (q.controls?.language) url.searchParams.set('lang', q.controls.language) + if (q.controls?.sort === 'latest' || q.controls?.sort === 'popular') url.searchParams.set('order', q.controls.sort) + if (q.controls?.safety === 'strict') url.searchParams.set('safesearch', 'true') + if (q.controls?.safety === 'off') url.searchParams.set('safesearch', 'false') + if (q.controls?.media?.kind === 'film' || q.controls?.media?.kind === 'animation') { + url.searchParams.set('video_type', q.controls.media.kind) + } + if (q.controls?.media?.minWidth !== undefined) url.searchParams.set('min_width', String(q.controls.media.minWidth)) + if (q.controls?.media?.minHeight !== undefined) url.searchParams.set('min_height', String(q.controls.media.minHeight)) + const legacyLanguage = useLegacyFilter(q.controls?.language, q.filters?.language) + if (legacyLanguage) url.searchParams.set('lang', legacyLanguage) + const opts = q.providerOptions as PixabayVideoSearchOptions | undefined + setIfString(url, 'lang', opts?.lang) + setIfString(url, 'id', opts?.id) + setIfString(url, 'video_type', opts?.videoType, ['all', 'film', 'animation']) + setIfString(url, 'category', opts?.category) + setIfNonNegativeInt(url, 'min_width', opts?.minWidth) + setIfNonNegativeInt(url, 'min_height', opts?.minHeight) + setIfBoolean(url, 'safesearch', opts?.safesearch) + setIfString(url, 'order', opts?.order, ['popular', 'latest']) + setIfBoolean(url, 'editors_choice', opts?.editorsChoice) + setIfPositiveInt(url, 'page', opts?.page) + setIfPositiveInt(url, 'per_page', opts?.perPage, { min: 3, max: 200 }) const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`pixabay video search failed: ${res.status}`) const json = (await res.json()) as PixabayVideoResponse diff --git a/packages/provider-poetrydb/src/__tests__/poetrydb.test.ts b/packages/provider-poetrydb/src/__tests__/poetrydb.test.ts index 3d48e38..c889897 100644 --- a/packages/provider-poetrydb/src/__tests__/poetrydb.test.ts +++ b/packages/provider-poetrydb/src/__tests__/poetrydb.test.ts @@ -42,4 +42,25 @@ describe('poetrydb provider', () => { const refs = await poetrydb().search({ text: 'zzzznomatch', modalities: ['text'] }, ctxWith({ status: 404, reason: 'Not found' })) expect(refs).toEqual([]) }) + + it('builds documented PoetryDB routes from providerOptions', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await poetrydb().search({ + text: 'ignored', + modalities: ['text'], + providerOptions: { + inputFields: ['title', 'author', 'poemcount'], + searchTerms: ['Winter', 'William Shakespeare', '2'], + matchExact: true, + outputFields: ['author', 'title', 'linecount'], + }, + }, ctx) + expect(calledUrl).toBe('https://poetrydb.org/title,author,poemcount/Winter;William%20Shakespeare;2:abs/author,title,lines,linecount') + }) }) diff --git a/packages/provider-poetrydb/src/index.ts b/packages/provider-poetrydb/src/index.ts index b8508c0..48b5fc8 100644 --- a/packages/provider-poetrydb/src/index.ts +++ b/packages/provider-poetrydb/src/index.ts @@ -5,6 +5,15 @@ import { interface PoetryDbPoem { title: string; author: string; lines: string[]; linecount: string } +export interface PoetryDbSearchOptions { + inputFields?: string | readonly string[] + searchTerms?: string | readonly string[] + matchExact?: boolean + outputFields?: string | readonly string[] + poemCount?: number + random?: number +} + const EXCERPT_LINES = 8 function toReference(p: PoetryDbPoem): Reference { @@ -36,14 +45,57 @@ function toReference(p: PoetryDbPoem): Reference { } } +function stringList(value: unknown, allowed: readonly string[]): string[] { + if (typeof value === 'string' && allowed.includes(value)) return [value] + if (Array.isArray(value) && value.every(v => typeof v === 'string' && allowed.includes(v))) return Array.from(value) + return [] +} + +function searchTerms(value: unknown): string[] { + if (typeof value === 'string' && value) return [value] + if (Array.isArray(value) && value.every(v => typeof v === 'string' && v)) return Array.from(value) + return [] +} + +function poetrydbUrl(text: string, options: PoetryDbSearchOptions | undefined): string { + if (!options) return `https://poetrydb.org/lines/${encodeURIComponent(text)}` + + const allowedInput = ['author', 'title', 'lines', 'linecount', 'poemcount', 'random'] + const fields = stringList(options.inputFields, allowedInput) + const terms = searchTerms(options.searchTerms) + if (options.poemCount !== undefined && Number.isInteger(options.poemCount) && options.poemCount > 0 && !fields.includes('poemcount')) { + fields.push('poemcount') + terms.push(String(options.poemCount)) + } + if (options.random !== undefined && Number.isInteger(options.random) && options.random > 0 && !fields.includes('random')) { + fields.push('random') + terms.push(String(options.random)) + } + const inputFields = fields.length > 0 ? fields : ['lines'] + const inputTerms = terms.length > 0 ? terms : [text] + if (inputFields.length !== inputTerms.length) return `https://poetrydb.org/lines/${encodeURIComponent(text)}` + + const encodedTerms = inputTerms.map(term => encodeURIComponent(term)).join(';') + const exact = options.matchExact ? ':abs' : '' + const output = stringList(options.outputFields, ['author', 'title', 'lines', 'linecount', 'all']) + if (output.length > 0 && !output.includes('all')) { + const required = ['author', 'title', 'lines', 'linecount'] + const extras = output.filter(field => !required.includes(field)) + output.splice(0, output.length, ...required, ...extras) + } + const outputSegment = output.length > 0 ? `/${output.join(',')}` : '' + return `https://poetrydb.org/${inputFields.join(',')}/${encodedTerms}${exact}${outputSegment}` +} + export function poetrydb() { return defineProvider({ id: 'poetrydb', modalities: ['text'], queryFeatures: ['keyword'], + capabilities: { controls: [] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { // /lines/ finds poems whose line content contains the term (closest to keyword search) - const url = `https://poetrydb.org/lines/${encodeURIComponent(q.text)}` + const url = poetrydbUrl(q.text, q.providerOptions as PoetryDbSearchOptions | undefined) const res = await ctx.fetch(url, { signal: ctx.signal }) if (!res.ok) throw new Error(`poetrydb search failed: ${res.status}`) const json = (await res.json()) as PoetryDbPoem[] | { status: number } diff --git a/packages/provider-smithsonian/src/__tests__/smithsonian.test.ts b/packages/provider-smithsonian/src/__tests__/smithsonian.test.ts index 45d92c4..2584578 100644 --- a/packages/provider-smithsonian/src/__tests__/smithsonian.test.ts +++ b/packages/provider-smithsonian/src/__tests__/smithsonian.test.ts @@ -57,4 +57,30 @@ describe('smithsonian provider', () => { expect(captured).toContain('api_key=SECRET') expect(captured).toContain('q=cat') }) + + it('forwards documented Smithsonian search options', async () => { + let captured = '' + const ctx: ProviderContext = { + fetch: (async (u: string) => { captured = String(u); return new Response(JSON.stringify({ response: { rows: [] } }), { status: 200 }) }) as typeof fetch, + } + await smithsonian({ apiKey: 'SECRET' }).search({ + text: 'cat', + modalities: ['image'], + providerOptions: { + start: 10, + rows: 25, + sort: 'newest', + type: 'all', + rowGroup: 'archives', + filterQuery: 'topic:"Cats"', + }, + }, ctx) + const url = new URL(captured) + expect(url.searchParams.get('start')).toBe('10') + expect(url.searchParams.get('rows')).toBe('25') + expect(url.searchParams.get('sort')).toBe('newest') + expect(url.searchParams.get('type')).toBe('all') + expect(url.searchParams.get('row_group')).toBe('archives') + expect(url.searchParams.get('fq')).toBe('online_media_type:"Images" AND media_usage:"CC0" AND topic:"Cats"') + }) }) diff --git a/packages/provider-smithsonian/src/index.ts b/packages/provider-smithsonian/src/index.ts index 8dee5d3..ec90dc2 100644 --- a/packages/provider-smithsonian/src/index.ts +++ b/packages/provider-smithsonian/src/index.ts @@ -8,6 +8,15 @@ export interface SmithsonianConfig { apiKey: string } +export interface SmithsonianSearchOptions { + start?: number + rows?: number + sort?: 'id' | 'newest' | 'updated' | 'random' + type?: 'edanmdm' | 'ead_collection' | 'ead_component' | 'all' + rowGroup?: 'objects' | 'archives' + filterQuery?: string +} + interface SiMedia { type?: string; content?: string; thumbnail?: string; usage?: { access?: string } } interface SiRow { id: string @@ -53,11 +62,23 @@ function toReference(row: SiRow): Reference | null { } } +function setIfNonNegativeInt(url: URL, key: string, value: unknown, max?: number) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 0) return + url.searchParams.set(key, String(max ? Math.min(value, max) : value)) +} + +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string' || !value) return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + export function smithsonian(config: SmithsonianConfig) { return defineProvider({ id: 'smithsonian', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: [] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.si.edu/openaccess/api/v1.0/search') url.searchParams.set('api_key', config.apiKey) @@ -65,6 +86,15 @@ export function smithsonian(config: SmithsonianConfig) { url.searchParams.set('rows', String(q.limit ?? 20)) // bias toward CC0 image records; toReference stays authoritative per media url.searchParams.set('fq', 'online_media_type:"Images" AND media_usage:"CC0"') + const opts = q.providerOptions as SmithsonianSearchOptions | undefined + setIfNonNegativeInt(url, 'start', opts?.start) + setIfNonNegativeInt(url, 'rows', opts?.rows, 1000) + setIfString(url, 'sort', opts?.sort, ['id', 'newest', 'updated', 'random']) + setIfString(url, 'type', opts?.type, ['edanmdm', 'ead_collection', 'ead_component', 'all']) + setIfString(url, 'row_group', opts?.rowGroup, ['objects', 'archives']) + if (opts?.filterQuery) { + url.searchParams.set('fq', `${url.searchParams.get('fq')} AND ${opts.filterQuery}`) + } const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`smithsonian search failed: ${res.status}`) const json = (await res.json()) as SiResponse diff --git a/packages/provider-unsplash/src/__tests__/unsplash.test.ts b/packages/provider-unsplash/src/__tests__/unsplash.test.ts index c0c5674..d262968 100644 --- a/packages/provider-unsplash/src/__tests__/unsplash.test.ts +++ b/packages/provider-unsplash/src/__tests__/unsplash.test.ts @@ -36,4 +36,70 @@ describe('unsplash provider', () => { const refs = await unsplash({ accessKey: 'k' }).search({ text: 'x', modalities: ['image'] }, ctxWith(f)) expect(refs[0].title).toBe('coffee cup') }) + + it('forwards documented search filters and Unsplash-specific options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await unsplash({ accessKey: 'k' }).search({ + text: 'coffee', + modalities: ['image'], + filters: { color: 'blue', orientation: 'square', language: 'zh-Hans' }, + providerOptions: { orderBy: 'latest', contentFilter: 'high', collections: ['abc', 'def'], page: 3, perPage: 12 }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('color')).toBe('blue') + expect(url.searchParams.get('orientation')).toBe('squarish') + expect(url.searchParams.get('lang')).toBe('zh-Hans') + expect(url.searchParams.get('order_by')).toBe('latest') + expect(url.searchParams.get('content_filter')).toBe('high') + expect(url.searchParams.get('collections')).toBe('abc,def') + expect(url.searchParams.get('page')).toBe('3') + expect(url.searchParams.get('per_page')).toBe('12') + }) + + it('maps unified controls to documented Unsplash search params', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await unsplash({ accessKey: 'k' }).search({ + text: 'coffee', + modalities: ['image'], + controls: { orientation: 'square', color: 'blue', language: 'zh-Hans', sort: 'latest', safety: 'strict' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('orientation')).toBe('squarish') + expect(url.searchParams.get('color')).toBe('blue') + expect(url.searchParams.get('lang')).toBe('zh-Hans') + expect(url.searchParams.get('order_by')).toBe('latest') + expect(url.searchParams.get('content_filter')).toBe('high') + }) + + it('keeps primary controls ahead of conflicting legacy filters in mixed migration calls', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify(FIXTURE), { status: 200 }) + }) as typeof fetch, + } + await unsplash({ accessKey: 'k' }).search({ + text: 'coffee', + modalities: ['image'], + filters: { color: 'red', orientation: 'portrait', language: 'en-US' }, + controls: { color: 'blue', orientation: 'square', language: 'zh-Hans' }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('color')).toBe('blue') + expect(url.searchParams.get('orientation')).toBe('squarish') + expect(url.searchParams.get('lang')).toBe('zh-Hans') + }) }) diff --git a/packages/provider-unsplash/src/index.ts b/packages/provider-unsplash/src/index.ts index 6aaaf5a..1565f3a 100644 --- a/packages/provider-unsplash/src/index.ts +++ b/packages/provider-unsplash/src/index.ts @@ -5,6 +5,15 @@ import { export interface UnsplashConfig { accessKey: string } +export interface UnsplashSearchOptions { + orderBy?: 'latest' | 'relevant' + contentFilter?: 'low' | 'high' + collections?: string | readonly string[] + lang?: string + page?: number + perPage?: number +} + interface UnsplashResult { id: string description: string | null @@ -18,6 +27,26 @@ interface UnsplashResult { } interface UnsplashResponse { results: UnsplashResult[] } +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string') return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setCollections(url: URL, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set('collections', value) + if (Array.isArray(value) && value.every(v => typeof v === 'string')) url.searchParams.set('collections', value.join(',')) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown, max?: number) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(max ? Math.min(value, max) : value)) +} + +function useLegacyFilter(control: T | undefined, legacy: T | undefined): T | undefined { + return control === undefined ? legacy : undefined +} + function toReference(r: UnsplashResult): Reference { const rights: RightsRecord = { license: 'unsplash', @@ -44,11 +73,34 @@ export function unsplash(config: UnsplashConfig) { return defineProvider({ id: 'unsplash', modalities: ['image'], - queryFeatures: ['keyword'], + queryFeatures: ['keyword', 'color', 'orientation', 'language'], + capabilities: { controls: ['orientation', 'color', 'language', 'sort', 'safety'] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://api.unsplash.com/search/photos') url.searchParams.set('query', q.text) url.searchParams.set('per_page', String(Math.min(q.limit ?? 10, 30))) // Unsplash hard-caps per_page at 30; default kept low for free-tier rate limits + const controls = q.controls + if (controls?.color) url.searchParams.set('color', controls.color) + if (controls?.orientation) url.searchParams.set('orientation', controls.orientation === 'square' ? 'squarish' : controls.orientation) + if (controls?.language) url.searchParams.set('lang', controls.language) + if (controls?.sort === 'latest' || controls?.sort === 'relevance') { + url.searchParams.set('order_by', controls.sort === 'relevance' ? 'relevant' : controls.sort) + } + if (controls?.safety === 'strict') url.searchParams.set('content_filter', 'high') + if (controls?.safety === 'moderate') url.searchParams.set('content_filter', 'low') + const legacyColor = useLegacyFilter(controls?.color, q.filters?.color) + if (legacyColor) url.searchParams.set('color', legacyColor) + const legacyOrientation = useLegacyFilter(controls?.orientation, q.filters?.orientation) + if (legacyOrientation) url.searchParams.set('orientation', legacyOrientation === 'square' ? 'squarish' : legacyOrientation) + const legacyLanguage = useLegacyFilter(controls?.language, q.filters?.language) + if (legacyLanguage) url.searchParams.set('lang', legacyLanguage) + const opts = q.providerOptions as UnsplashSearchOptions | undefined + setIfString(url, 'order_by', opts?.orderBy, ['latest', 'relevant']) + setIfString(url, 'content_filter', opts?.contentFilter, ['low', 'high']) + setCollections(url, opts?.collections) + setIfString(url, 'lang', opts?.lang) + setIfPositiveInt(url, 'page', opts?.page) + setIfPositiveInt(url, 'per_page', opts?.perPage, 30) const res = await ctx.fetch(url.toString(), { headers: { Authorization: `Client-ID ${config.accessKey}`, 'Accept-Version': 'v1' }, signal: ctx.signal, diff --git a/packages/provider-wikimedia-commons/src/__tests__/wikimedia-commons.test.ts b/packages/provider-wikimedia-commons/src/__tests__/wikimedia-commons.test.ts index be1c5cb..65744e4 100644 --- a/packages/provider-wikimedia-commons/src/__tests__/wikimedia-commons.test.ts +++ b/packages/provider-wikimedia-commons/src/__tests__/wikimedia-commons.test.ts @@ -90,6 +90,53 @@ describe('wikimedia-commons provider', () => { expect(refs.map(r => r.title)).toEqual(['Cat playing with a lizard', 'Felis catus-cat on snow']) }) + it('forwards documented Wikimedia generator and imageinfo options', async () => { + let calledUrl = '' + const ctx: ProviderContext = { + fetch: (async (input: Parameters[0]) => { + calledUrl = String(input) + return new Response(JSON.stringify({ query: { pages: {} } }), { status: 200 }) + }) as typeof fetch, + } + await wikimediaCommons().search({ + text: 'cat', + modalities: ['image'], + providerOptions: { + gsrnamespace: '14', + gsrlimit: 7, + gsroffset: 20, + gsrqiprofile: 'classic', + gsrqdprofile: 'perfield_builder', + gsrwhat: 'title', + gsrinfo: ['totalhits'], + gsrprop: ['snippet', 'timestamp'], + gsrinterwiki: true, + gsrenablerewrites: false, + gsrsort: 'last_edit_desc', + iiprop: ['commonmetadata'], + iiurlwidth: 640, + iiextmetadatafilter: ['License', 'Artist'], + }, + }, ctx) + const url = new URL(calledUrl) + expect(url.searchParams.get('gsrnamespace')).toBe('6') + expect(url.searchParams.get('gsrlimit')).toBe('7') + expect(url.searchParams.get('gsroffset')).toBe('20') + expect(url.searchParams.get('gsrqiprofile')).toBe('classic') + expect(url.searchParams.get('gsrqdprofile')).toBe('perfield_builder') + expect(url.searchParams.get('gsrwhat')).toBe('title') + expect(url.searchParams.get('gsrinfo')).toBe('totalhits') + expect(url.searchParams.get('gsrprop')).toBe('snippet|timestamp') + expect(url.searchParams.get('gsrinterwiki')).toBe('true') + expect(url.searchParams.get('gsrenablerewrites')).toBe('false') + expect(url.searchParams.get('gsrsort')).toBe('last_edit_desc') + expect(url.searchParams.get('iiprop')).toContain('url') + expect(url.searchParams.get('iiprop')).toContain('extmetadata') + expect(url.searchParams.get('iiprop')).toContain('commonmetadata') + expect(url.searchParams.get('iiurlwidth')).toBe('640') + expect(url.searchParams.get('iiextmetadatafilter')).toBe('License|Artist') + }) + it('falls back to the file name when ObjectName carries structured-data label markup', async () => { const QS = { query: { pages: { '1': { diff --git a/packages/provider-wikimedia-commons/src/index.ts b/packages/provider-wikimedia-commons/src/index.ts index d8944d7..6f05721 100644 --- a/packages/provider-wikimedia-commons/src/index.ts +++ b/packages/provider-wikimedia-commons/src/index.ts @@ -9,6 +9,22 @@ export interface WikimediaCommonsConfig { thumbWidth?: number } +export interface WikimediaCommonsSearchOptions { + gsrlimit?: number + gsroffset?: number + gsrqiprofile?: string + gsrqdprofile?: string + gsrwhat?: 'nearmatch' | 'text' | 'title' + gsrinfo?: string | readonly string[] + gsrprop?: string | readonly string[] + gsrinterwiki?: boolean + gsrenablerewrites?: boolean + gsrsort?: string + iiprop?: string | readonly string[] + iiurlwidth?: number + iiextmetadatafilter?: string | readonly string[] +} + // Map a Wikimedia Commons extmetadata `License` code (e.g. "cc-by-sa-4.0", "cc0", // "pd-old") to our LicenseId + CC version. NC/ND variants → 'proprietary'; anything // unrecognized — including non-free / fair-use files — → 'unknown' (strict-deny → @@ -100,11 +116,49 @@ function toReference(page: CommonsPage): Reference | null { } } +function setIfString(url: URL, key: string, value: unknown, allowed?: readonly string[]) { + if (typeof value !== 'string' || !value) return + if (allowed && !allowed.includes(value)) return + url.searchParams.set(key, value) +} + +function setPipeList(url: URL, key: string, value: unknown) { + if (typeof value === 'string' && value) url.searchParams.set(key, value) + if (Array.isArray(value) && value.every(v => typeof v === 'string')) url.searchParams.set(key, value.join('|')) +} + +function setIfNonNegativeInt(url: URL, key: string, value: unknown) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 0) return + url.searchParams.set(key, String(value)) +} + +function setIfPositiveInt(url: URL, key: string, value: unknown, max?: number) { + if (typeof value !== 'number' || !Number.isInteger(value) || value < 1) return + url.searchParams.set(key, String(max ? Math.min(value, max) : value)) +} + +function setIfBoolean(url: URL, key: string, value: unknown) { + if (typeof value !== 'boolean') return + url.searchParams.set(key, String(value)) +} + +function commonsImageInfoProps(value: unknown): string { + const props = new Set(['url', 'mime', 'size', 'extmetadata']) + if (typeof value === 'string') { + for (const item of value.split('|')) if (item.trim()) props.add(item.trim()) + } + if (Array.isArray(value) && value.every(v => typeof v === 'string')) { + for (const item of value) if (item) props.add(item) + } + return Array.from(props).join('|') +} + export function wikimediaCommons(config: WikimediaCommonsConfig = {}) { return defineProvider({ id: 'wikimedia-commons', modalities: ['image'], queryFeatures: ['keyword'], + capabilities: { controls: [] }, async search(q: NormalizedQuery, ctx: ProviderContext): Promise { const url = new URL('https://commons.wikimedia.org/w/api.php') url.searchParams.set('action', 'query') @@ -116,6 +170,20 @@ export function wikimediaCommons(config: WikimediaCommonsConfig = {}) { url.searchParams.set('prop', 'imageinfo') url.searchParams.set('iiprop', 'url|mime|size|extmetadata') url.searchParams.set('iiurlwidth', String(config.thumbWidth ?? 1024)) + const opts = q.providerOptions as WikimediaCommonsSearchOptions | undefined + setIfPositiveInt(url, 'gsrlimit', opts?.gsrlimit, 500) + setIfNonNegativeInt(url, 'gsroffset', opts?.gsroffset) + setIfString(url, 'gsrqiprofile', opts?.gsrqiprofile) + setIfString(url, 'gsrqdprofile', opts?.gsrqdprofile) + setIfString(url, 'gsrwhat', opts?.gsrwhat, ['nearmatch', 'text', 'title']) + setPipeList(url, 'gsrinfo', opts?.gsrinfo) + setPipeList(url, 'gsrprop', opts?.gsrprop) + setIfBoolean(url, 'gsrinterwiki', opts?.gsrinterwiki) + setIfBoolean(url, 'gsrenablerewrites', opts?.gsrenablerewrites) + setIfString(url, 'gsrsort', opts?.gsrsort) + url.searchParams.set('iiprop', commonsImageInfoProps(opts?.iiprop)) + setIfPositiveInt(url, 'iiurlwidth', opts?.iiurlwidth) + setPipeList(url, 'iiextmetadatafilter', opts?.iiextmetadatafilter) const res = await ctx.fetch(url.toString(), { signal: ctx.signal }) if (!res.ok) throw new Error(`wikimedia-commons search failed: ${res.status}`) const json = (await res.json()) as CommonsResponse