diff --git a/__tests__/routes/public/recommend.ts b/__tests__/routes/public/recommend.ts new file mode 100644 index 0000000000..61c353f65e --- /dev/null +++ b/__tests__/routes/public/recommend.ts @@ -0,0 +1,205 @@ +import request from 'supertest'; +import nock from 'nock'; +import { setupPublicApiTests, createTokenForUser } from './helpers'; + +const state = setupPublicApiTests(); + +afterEach(() => { + nock.cleanAll(); +}); + +const nockMimir = (postIds: string[]) => { + nock('http://localhost:7600') + .post('/v1/search') + .reply( + 204, + JSON.stringify({ + result: postIds.map((postId) => ({ postId })), + }), + ); +}; + +describe('GET /public/v1/recommend/keyword', () => { + it('should return posts matching mimir results with correct fields', async () => { + const token = await createTokenForUser(state.con, '5'); + nockMimir(['p1', 'p2']); + + const { body, headers } = await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'javascript' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(headers['x-daily-experimental']).toBeDefined(); + expect(body.data).toHaveLength(2); + expect(body.data).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: 'p1', + title: 'P1', + url: 'http://p1.com', + image: 'https://daily.dev/image.jpg', + type: 'article', + tags: ['javascript', 'webdev'], + source: expect.objectContaining({ + id: 'a', + name: 'A', + handle: 'a', + }), + }), + expect.objectContaining({ + id: 'p2', + title: 'P2', + url: 'http://p2.com', + source: expect.objectContaining({ + id: 'b', + name: 'B', + }), + }), + ]), + ); + expect(body.pagination).toMatchObject({ + hasNextPage: expect.any(Boolean), + cursor: expect.any(String), + }); + }); + + it('should respect limit parameter', async () => { + const token = await createTokenForUser(state.con, '5'); + // Mimir receives limit=2 and returns 2 results + nockMimir(['p1', 'p2']); + + const { body } = await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'typescript', limit: 2 }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(body.data).toHaveLength(2); + expect(body.data[0]).toMatchObject({ id: 'p1' }); + expect(body.data[1]).toMatchObject({ id: 'p2' }); + }); + + it('should pass time filter to search', async () => { + const token = await createTokenForUser(state.con, '5'); + nockMimir(['p1']); + + const { body, headers } = await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'react', time: 'month' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(headers['x-daily-experimental']).toBeDefined(); + expect(body.data).toHaveLength(1); + expect(body.data[0]).toMatchObject({ + id: 'p1', + title: 'P1', + }); + }); + + it('should not return private posts', async () => { + const token = await createTokenForUser(state.con, '5'); + // p6 is private + nockMimir(['p1', 'p6']); + + const { body } = await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'test' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(body.data).toHaveLength(1); + expect(body.data[0].id).toBe('p1'); + }); + + it('should return empty data when no mimir results', async () => { + const token = await createTokenForUser(state.con, '5'); + nockMimir([]); + + const { body } = await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'nonexistenttopic' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(body.data).toEqual([]); + }); + + it('should require authentication', async () => { + await request(state.app.server) + .get('/public/v1/recommend/keyword') + .query({ q: 'test' }) + .expect(401); + }); +}); + +describe('GET /public/v1/recommend/semantic', () => { + it('should return posts matching mimir results with correct fields', async () => { + const token = await createTokenForUser(state.con, '5'); + nockMimir(['p1', 'p2']); + + const { body, headers } = await request(state.app.server) + .get('/public/v1/recommend/semantic') + .query({ q: 'how do I make my chatbot remember things' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(headers['x-daily-experimental']).toBeDefined(); + expect(body.data).toHaveLength(2); + expect(body.data).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: 'p1', + title: 'P1', + url: 'http://p1.com', + tags: ['javascript', 'webdev'], + source: expect.objectContaining({ id: 'a', name: 'A' }), + }), + expect.objectContaining({ + id: 'p2', + title: 'P2', + url: 'http://p2.com', + source: expect.objectContaining({ id: 'b', name: 'B' }), + }), + ]), + ); + expect(body.pagination).toBeUndefined(); + }); + + it('should respect limit parameter', async () => { + const token = await createTokenForUser(state.con, '5'); + // Mimir receives limit=2 and returns 2 results + nockMimir(['p1', 'p2']); + + const { body } = await request(state.app.server) + .get('/public/v1/recommend/semantic') + .query({ q: 'what is the best vector database', limit: 2 }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(body.data).toHaveLength(2); + expect(body.data[0]).toMatchObject({ id: 'p1' }); + expect(body.data[1]).toMatchObject({ id: 'p2' }); + }); + + it('should return empty data when no mimir results', async () => { + const token = await createTokenForUser(state.con, '5'); + nockMimir([]); + + const { body } = await request(state.app.server) + .get('/public/v1/recommend/semantic') + .query({ q: 'nonexistenttopic' }) + .set('Authorization', `Bearer ${token}`) + .expect(200); + + expect(body.data).toEqual([]); + }); + + it('should require authentication', async () => { + await request(state.app.server) + .get('/public/v1/recommend/semantic') + .query({ q: 'test' }) + .expect(401); + }); +}); diff --git a/src/routes/public/index.ts b/src/routes/public/index.ts index a3e6d37106..3282cb331d 100644 --- a/src/routes/public/index.ts +++ b/src/routes/public/index.ts @@ -13,6 +13,7 @@ import profileRoutes from './profile'; import stackRoutes from './stack'; import experiencesRoutes from './experiences'; import tagsRoutes from './tags'; +import recommendRoutes from './recommend'; import { commonSchemas } from './schemas'; import { PUBLIC_API_PREFIX } from '../../common/constants'; @@ -194,4 +195,5 @@ export default async function ( await fastify.register(stackRoutes, { prefix: '/profile/stack' }); await fastify.register(experiencesRoutes, { prefix: '/profile/experiences' }); await fastify.register(tagsRoutes, { prefix: '/tags' }); + await fastify.register(recommendRoutes, { prefix: '/recommend' }); } diff --git a/src/routes/public/recommend.ts b/src/routes/public/recommend.ts new file mode 100644 index 0000000000..d4510f01dd --- /dev/null +++ b/src/routes/public/recommend.ts @@ -0,0 +1,225 @@ +import type { FastifyInstance } from 'fastify'; +import { executeGraphql } from './graphqlExecutor'; +import type { FeedConnection, PostNode } from './common'; +import { + parseLimit, + ensureDbConnection, + POST_NODE_FIELDS, + PAGE_INFO_FIELDS, +} from './common'; + +const EXPERIMENTAL_HEADER = 'x-daily-experimental'; +const EXPERIMENTAL_WARNING = + 'This endpoint is experimental and may be removed or changed without notice.'; + +const RECOMMEND_MAX_LIMIT = 20; + +const KEYWORD_SEARCH_QUERY = ` + query PublicApiRecommendKeyword($query: String!, $first: Int, $after: String, $time: SearchTime) { + searchPosts(query: $query, first: $first, after: $after, time: $time) { + edges { + node { + ${POST_NODE_FIELDS} + } + } + ${PAGE_INFO_FIELDS} + } + } +`; + +const SEMANTIC_SEARCH_QUERY = ` + query PublicApiRecommendSemantic($query: String!, $first: Int, $time: SearchTime) { + searchPosts(query: $query, first: $first, time: $time) { + edges { + node { + ${POST_NODE_FIELDS} + } + } + ${PAGE_INFO_FIELDS} + } + } +`; + +type SearchPostsResponse = { + searchPosts: FeedConnection; +}; + +const TIME_MAP: Record = { + day: 'Today', + week: 'LastSevenDays', + month: 'LastThirtyDays', + year: 'ThisYear', + all: 'AllTime', +}; + +export default async function (fastify: FastifyInstance): Promise { + // Option 1: Keyword-based recommendation + // Best for: extracted technical terms, specific technology names + fastify.get<{ + Querystring: { q: string; limit?: string; cursor?: string; time?: string }; + }>( + '/keyword', + { + schema: { + description: + '[EXPERIMENTAL] Recommend articles by keyword search. Best when the query contains specific technical terms (e.g. "RAG", "pgvector", "LangChain"). Returns posts with engagement signals for LLM consumption. This endpoint may be removed or changed without notice.', + tags: ['recommend'], + querystring: { + type: 'object', + required: ['q'], + properties: { + q: { + type: 'string', + description: + 'Search query — keywords or technical terms (e.g. "RAG vs fine-tuning", "vector database comparison")', + minLength: 1, + }, + limit: { + type: 'integer', + default: 10, + maximum: 20, + minimum: 1, + description: + 'Number of articles to return (1-20, default 10). Kept small for LLM context efficiency.', + }, + cursor: { + type: 'string', + description: 'Pagination cursor from previous response', + }, + time: { + type: 'string', + enum: ['day', 'week', 'month', 'year', 'all'], + description: + 'Time range filter — use "month" or "year" for recent content, "all" for comprehensive results', + }, + }, + }, + response: { + 200: { + type: 'object', + properties: { + data: { type: 'array', items: { $ref: 'FeedPost#' } }, + pagination: { $ref: 'Pagination#' }, + }, + }, + 400: { $ref: 'Error#' }, + 401: { $ref: 'Error#' }, + 429: { $ref: 'RateLimitError#' }, + }, + }, + }, + async (request, reply) => { + const { q, time } = request.query; + const limit = parseLimit(request.query.limit, RECOMMEND_MAX_LIMIT); + const { cursor } = request.query; + const con = ensureDbConnection(fastify.con); + + reply.header(EXPERIMENTAL_HEADER, EXPERIMENTAL_WARNING); + + return executeGraphql( + con, + { + query: KEYWORD_SEARCH_QUERY, + variables: { + query: q, + first: limit, + after: cursor ?? null, + time: time ? TIME_MAP[time] : null, + }, + }, + (json) => { + const result = json as unknown as SearchPostsResponse; + return { + data: result.searchPosts.edges.map(({ node }) => node), + pagination: { + hasNextPage: result.searchPosts.pageInfo.hasNextPage, + cursor: result.searchPosts.pageInfo.endCursor, + }, + }; + }, + request, + reply, + ); + }, + ); + + // Option 2: Semantic recommendation via Mimir + // Best for: natural language questions, vague queries from non-technical users + // Uses the same underlying Mimir search but framed for single-shot LLM consumption + fastify.get<{ + Querystring: { q: string; limit?: string; time?: string }; + }>( + '/semantic', + { + schema: { + description: + '[EXPERIMENTAL] Recommend articles by semantic search. Uses AI-powered matching to find articles for natural language questions. Better for non-technical queries like "how do I make my chatbot remember things?" This endpoint may be removed or changed without notice.', + tags: ['recommend'], + querystring: { + type: 'object', + required: ['q'], + properties: { + q: { + type: 'string', + description: + 'Natural language question or topic (e.g. "how do I make my chatbot remember previous conversations?", "what is the best way to handle authentication in a Next.js app?")', + minLength: 1, + }, + limit: { + type: 'integer', + default: 10, + maximum: 20, + minimum: 1, + description: + 'Number of articles to return (1-20, default 10). Kept small for LLM context efficiency.', + }, + time: { + type: 'string', + enum: ['day', 'week', 'month', 'year', 'all'], + description: + 'Time range filter — use "month" or "year" for recent content, "all" for comprehensive results', + }, + }, + }, + response: { + 200: { + type: 'object', + properties: { + data: { type: 'array', items: { $ref: 'FeedPost#' } }, + }, + }, + 400: { $ref: 'Error#' }, + 401: { $ref: 'Error#' }, + 429: { $ref: 'RateLimitError#' }, + }, + }, + }, + async (request, reply) => { + const { q, time } = request.query; + const limit = parseLimit(request.query.limit, RECOMMEND_MAX_LIMIT); + const con = ensureDbConnection(fastify.con); + + reply.header(EXPERIMENTAL_HEADER, EXPERIMENTAL_WARNING); + + return executeGraphql( + con, + { + query: SEMANTIC_SEARCH_QUERY, + variables: { + query: q, + first: limit, + time: time ? TIME_MAP[time] : null, + }, + }, + (json) => { + const result = json as unknown as SearchPostsResponse; + return { + data: result.searchPosts.edges.map(({ node }) => node), + }; + }, + request, + reply, + ); + }, + ); +}