Skip to content

Commit 9d30050

Browse files
committed
feat(docs): add accurate Architecture and Contributing pages
Architecture page includes: - Complete tech stack breakdown (backend + frontend) - Data flow diagrams for indexing and search pipelines - Backend services documentation - Frontend structure overview - MCP server explanation - Database schema overview - Emphasis on Bun requirement for frontend Contributing page includes: - Prerequisites with accurate versions - Local setup instructions (using Bun, not npm) - Test commands - Code style guidelines - PR process - Project structure reference
1 parent 1e0d075 commit 9d30050

4 files changed

Lines changed: 609 additions & 3 deletions

File tree

frontend/src/App.tsx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import { QuickStartPage } from './pages/QuickStartPage';
1414
import { SemanticSearchPage, DependencyAnalysisPage, ImpactPredictionPage, CodeStyleAnalysisPage } from './pages/features';
1515
import { DockerSetupPage, SelfHostingPage } from './pages/deployment';
1616
import { APIOverviewPage, APIRepositoriesPage, APISearchPage, APIAnalysisPage } from './pages/api';
17+
import { ArchitecturePage } from './pages/ArchitecturePage';
18+
import { ContributingPage } from './pages/ContributingPage';
1719
import { GitHubCallbackPage } from './pages/GitHubCallbackPage';
1820
import { ScrollToTop } from './components/ScrollToTop';
1921

@@ -94,9 +96,9 @@ function AppRoutes() {
9496
<Route path="/docs/api/search" element={<APISearchPage />} />
9597
<Route path="/docs/api/analysis" element={<APIAnalysisPage />} />
9698

97-
{/* Placeholder routes for future docs pages */}
98-
<Route path="/docs/architecture" element={<DocsHomePage />} />
99-
<Route path="/docs/contributing/*" element={<DocsHomePage />} />
99+
{/* Contributing pages */}
100+
<Route path="/docs/architecture" element={<ArchitecturePage />} />
101+
<Route path="/docs/contributing" element={<ContributingPage />} />
100102
<Route
101103
path="/github/callback"
102104
element={

frontend/src/components/docs/DocsSidebar.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ const navigation: NavSection[] = [
7272
title: 'Contributing',
7373
items: [
7474
{ title: 'Architecture', href: '/docs/architecture', icon: <GitBranch className="w-4 h-4" /> },
75+
{ title: 'Development Setup', href: '/docs/contributing', icon: <Code className="w-4 h-4" /> },
7576
],
7677
},
7778
]
Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
import {
2+
DocsLayout,
3+
DocsCodeBlock,
4+
DocsCallout,
5+
DocsPagination,
6+
TimeEstimate,
7+
TOCItem
8+
} from '@/components/docs'
9+
10+
const tocItems: TOCItem[] = [
11+
{ id: 'overview', title: 'Overview', level: 2 },
12+
{ id: 'tech-stack', title: 'Tech Stack', level: 2 },
13+
{ id: 'data-flow', title: 'Data Flow', level: 2 },
14+
{ id: 'backend-services', title: 'Backend Services', level: 2 },
15+
{ id: 'frontend-structure', title: 'Frontend Structure', level: 2 },
16+
{ id: 'mcp-server', title: 'MCP Server', level: 2 },
17+
{ id: 'database-schema', title: 'Database Schema', level: 2 },
18+
]
19+
20+
export function ArchitecturePage() {
21+
return (
22+
<DocsLayout toc={tocItems}>
23+
<div className="mb-8 pb-8 border-b border-white/10">
24+
<div className="flex items-center gap-3 mb-4">
25+
<TimeEstimate minutes={10} />
26+
</div>
27+
<h1 className="text-4xl font-bold text-white mb-4">Architecture</h1>
28+
<p className="text-xl text-gray-400">
29+
How OpenCodeIntel is built. Technical deep-dive for contributors.
30+
</p>
31+
</div>
32+
33+
<h2 id="overview" className="text-2xl font-semibold text-white mt-12 mb-4">Overview</h2>
34+
35+
<p className="text-gray-300 mb-6">
36+
OpenCodeIntel is a monorepo with three main components: a FastAPI backend,
37+
a React frontend, and a standalone MCP server. The backend handles code indexing,
38+
semantic search, and analysis. The frontend provides the dashboard UI. The MCP
39+
server exposes tools to AI assistants like Claude.
40+
</p>
41+
42+
<DocsCodeBlock language="text">
43+
{`opencodeintel/
44+
├── backend/ # FastAPI API server (Python 3.11+)
45+
├── frontend/ # React dashboard (TypeScript, Vite, Bun)
46+
├── mcp-server/ # MCP protocol server (Python)
47+
├── supabase/ # Database migrations
48+
├── docs/ # Additional documentation
49+
└── docker-compose.yml`}
50+
</DocsCodeBlock>
51+
52+
<h2 id="tech-stack" className="text-2xl font-semibold text-white mt-12 mb-4">Tech Stack</h2>
53+
54+
<h3 className="text-lg font-medium text-white mt-6 mb-3">Backend</h3>
55+
<div className="overflow-x-auto mb-6">
56+
<table className="w-full text-sm">
57+
<thead>
58+
<tr className="border-b border-white/10">
59+
<th className="text-left py-2 text-gray-400 font-medium">Component</th>
60+
<th className="text-left py-2 text-gray-400 font-medium">Technology</th>
61+
<th className="text-left py-2 text-gray-400 font-medium">Purpose</th>
62+
</tr>
63+
</thead>
64+
<tbody className="text-gray-300">
65+
<tr className="border-b border-white/5">
66+
<td className="py-2">Framework</td>
67+
<td className="py-2 font-mono text-blue-400">FastAPI</td>
68+
<td className="py-2">Async REST API with automatic OpenAPI docs</td>
69+
</tr>
70+
<tr className="border-b border-white/5">
71+
<td className="py-2">Runtime</td>
72+
<td className="py-2 font-mono text-blue-400">Python 3.11+</td>
73+
<td className="py-2">Required for tree-sitter bindings</td>
74+
</tr>
75+
<tr className="border-b border-white/5">
76+
<td className="py-2">Code Parsing</td>
77+
<td className="py-2 font-mono text-blue-400">Tree-sitter</td>
78+
<td className="py-2">AST extraction for Python, JS, TS</td>
79+
</tr>
80+
<tr className="border-b border-white/5">
81+
<td className="py-2">Embeddings</td>
82+
<td className="py-2 font-mono text-blue-400">OpenAI text-embedding-3-small</td>
83+
<td className="py-2">1536-dim vectors for semantic search</td>
84+
</tr>
85+
<tr className="border-b border-white/5">
86+
<td className="py-2">Summaries</td>
87+
<td className="py-2 font-mono text-blue-400">GPT-4o-mini</td>
88+
<td className="py-2">Natural language code descriptions</td>
89+
</tr>
90+
<tr className="border-b border-white/5">
91+
<td className="py-2">Vector DB</td>
92+
<td className="py-2 font-mono text-blue-400">Pinecone</td>
93+
<td className="py-2">Serverless vector storage and search</td>
94+
</tr>
95+
<tr className="border-b border-white/5">
96+
<td className="py-2">Cache</td>
97+
<td className="py-2 font-mono text-blue-400">Redis</td>
98+
<td className="py-2">Query caching, rate limiting</td>
99+
</tr>
100+
<tr className="border-b border-white/5">
101+
<td className="py-2">Database</td>
102+
<td className="py-2 font-mono text-blue-400">Supabase (PostgreSQL)</td>
103+
<td className="py-2">User data, repo metadata, API keys</td>
104+
</tr>
105+
<tr className="border-b border-white/5">
106+
<td className="py-2">Auth</td>
107+
<td className="py-2 font-mono text-blue-400">Supabase Auth</td>
108+
<td className="py-2">JWT-based authentication</td>
109+
</tr>
110+
<tr>
111+
<td className="py-2">Reranking</td>
112+
<td className="py-2 font-mono text-blue-400">Cohere (optional)</td>
113+
<td className="py-2">Improves search result ordering</td>
114+
</tr>
115+
</tbody>
116+
</table>
117+
</div>
118+
119+
<h3 className="text-lg font-medium text-white mt-6 mb-3">Frontend</h3>
120+
<div className="overflow-x-auto mb-6">
121+
<table className="w-full text-sm">
122+
<thead>
123+
<tr className="border-b border-white/10">
124+
<th className="text-left py-2 text-gray-400 font-medium">Component</th>
125+
<th className="text-left py-2 text-gray-400 font-medium">Technology</th>
126+
<th className="text-left py-2 text-gray-400 font-medium">Purpose</th>
127+
</tr>
128+
</thead>
129+
<tbody className="text-gray-300">
130+
<tr className="border-b border-white/5">
131+
<td className="py-2">Framework</td>
132+
<td className="py-2 font-mono text-blue-400">React 18</td>
133+
<td className="py-2">UI framework</td>
134+
</tr>
135+
<tr className="border-b border-white/5">
136+
<td className="py-2">Language</td>
137+
<td className="py-2 font-mono text-blue-400">TypeScript</td>
138+
<td className="py-2">Type safety</td>
139+
</tr>
140+
<tr className="border-b border-white/5">
141+
<td className="py-2">Build Tool</td>
142+
<td className="py-2 font-mono text-blue-400">Vite</td>
143+
<td className="py-2">Fast dev server and bundling</td>
144+
</tr>
145+
<tr className="border-b border-white/5">
146+
<td className="py-2">Package Manager</td>
147+
<td className="py-2 font-mono text-blue-400">Bun</td>
148+
<td className="py-2">Fast installs (do NOT use npm)</td>
149+
</tr>
150+
<tr className="border-b border-white/5">
151+
<td className="py-2">Styling</td>
152+
<td className="py-2 font-mono text-blue-400">Tailwind CSS</td>
153+
<td className="py-2">Utility-first CSS</td>
154+
</tr>
155+
<tr className="border-b border-white/5">
156+
<td className="py-2">Components</td>
157+
<td className="py-2 font-mono text-blue-400">shadcn/ui + Radix</td>
158+
<td className="py-2">Accessible component primitives</td>
159+
</tr>
160+
<tr className="border-b border-white/5">
161+
<td className="py-2">Data Fetching</td>
162+
<td className="py-2 font-mono text-blue-400">TanStack Query</td>
163+
<td className="py-2">Caching, background refetch</td>
164+
</tr>
165+
<tr className="border-b border-white/5">
166+
<td className="py-2">Routing</td>
167+
<td className="py-2 font-mono text-blue-400">React Router v7</td>
168+
<td className="py-2">Client-side navigation</td>
169+
</tr>
170+
<tr>
171+
<td className="py-2">Graph Visualization</td>
172+
<td className="py-2 font-mono text-blue-400">React Flow + Dagre</td>
173+
<td className="py-2">Dependency graph rendering</td>
174+
</tr>
175+
</tbody>
176+
</table>
177+
</div>
178+
179+
<DocsCallout type="warning" title="Important">
180+
The frontend uses <strong>Bun</strong> exclusively. Never use npm or yarn.
181+
Always run <code>bun install</code>, not <code>npm install</code>.
182+
</DocsCallout>
183+
184+
<h2 id="data-flow" className="text-2xl font-semibold text-white mt-12 mb-4">Data Flow</h2>
185+
186+
<h3 className="text-lg font-medium text-white mt-6 mb-3">Indexing Pipeline</h3>
187+
<p className="text-gray-300 mb-4">When a repository is added:</p>
188+
189+
<DocsCodeBlock language="text">
190+
{`1. Clone repo to backend/repos/{uuid}/
191+
2. Walk file tree, filter by language (Python, JS, TS)
192+
3. For each file:
193+
a. Parse with Tree-sitter → Extract functions/classes
194+
b. Generate summary with GPT-4o-mini
195+
c. Create embedding with text-embedding-3-small
196+
d. Store in Pinecone with metadata
197+
4. Build dependency graph from import statements
198+
5. Cache graph in Redis
199+
6. Update repo status in Supabase`}
200+
</DocsCodeBlock>
201+
202+
<h3 className="text-lg font-medium text-white mt-6 mb-3">Search Pipeline</h3>
203+
<DocsCodeBlock language="text">
204+
{`1. Receive query from user/MCP
205+
2. Check Redis cache for identical query
206+
3. If miss:
207+
a. Embed query with text-embedding-3-small
208+
b. Query Pinecone for top-k similar chunks
209+
c. (Optional) Rerank with Cohere
210+
d. Cache results in Redis (5 min TTL)
211+
4. Return formatted results`}
212+
</DocsCodeBlock>
213+
214+
<h2 id="backend-services" className="text-2xl font-semibold text-white mt-12 mb-4">Backend Services</h2>
215+
216+
<p className="text-gray-300 mb-4">
217+
Key services in <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">backend/services/</code>:
218+
</p>
219+
220+
<div className="space-y-4 mb-6">
221+
<ServiceCard
222+
name="indexer_optimized.py"
223+
description="Main indexing engine. Handles file parsing, embedding generation, and Pinecone upserts. Uses batch processing for performance."
224+
/>
225+
<ServiceCard
226+
name="dependency_analyzer.py"
227+
description="Extracts import statements using Tree-sitter. Builds directed graph of file dependencies. Identifies hub files and circular dependencies."
228+
/>
229+
<ServiceCard
230+
name="style_analyzer.py"
231+
description="Analyzes coding conventions: naming patterns (snake_case vs camelCase), async usage, type hint coverage, common imports."
232+
/>
233+
<ServiceCard
234+
name="dna_extractor.py"
235+
description="Generates 'codebase DNA' - architectural patterns, auth conventions, database patterns, error handling. Used to help AI write consistent code."
236+
/>
237+
<ServiceCard
238+
name="search_v2/"
239+
description="Hybrid search implementation. Combines BM25 keyword search with semantic embeddings. Optional Cohere reranking."
240+
/>
241+
<ServiceCard
242+
name="cache.py"
243+
description="Redis wrapper for caching search results, dependency graphs, and style analysis."
244+
/>
245+
</div>
246+
247+
<h3 className="text-lg font-medium text-white mt-6 mb-3">API Routes</h3>
248+
<p className="text-gray-300 mb-4">
249+
Routes in <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">backend/routes/</code>.
250+
All prefixed with <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">/api/v1</code>:
251+
</p>
252+
253+
<DocsCodeBlock language="text">
254+
{`repos.py → /api/v1/repos/* # CRUD for repositories
255+
search.py → /api/v1/search # Semantic search
256+
analysis.py → /api/v1/repos/{id}/* # Dependencies, impact, style, DNA
257+
auth.py → /api/v1/auth/* # Login, signup, session
258+
github.py → /api/v1/github/* # GitHub OAuth, repo import
259+
health.py → /health # Health check (no prefix)`}
260+
</DocsCodeBlock>
261+
262+
<h2 id="frontend-structure" className="text-2xl font-semibold text-white mt-12 mb-4">Frontend Structure</h2>
263+
264+
<DocsCodeBlock language="text">
265+
{`frontend/src/
266+
├── components/
267+
│ ├── ui/ # shadcn/ui primitives
268+
│ ├── docs/ # Documentation components
269+
│ ├── dashboard/ # Dashboard-specific components
270+
│ ├── landing/ # Landing page components
271+
│ └── DependencyGraph/ # React Flow graph components
272+
├── pages/ # Route components
273+
├── contexts/ # React contexts (AuthContext)
274+
├── hooks/ # Custom hooks
275+
├── services/ # API client
276+
├── lib/ # Utilities
277+
└── config/ # App configuration`}
278+
</DocsCodeBlock>
279+
280+
<h2 id="mcp-server" className="text-2xl font-semibold text-white mt-12 mb-4">MCP Server</h2>
281+
282+
<p className="text-gray-300 mb-4">
283+
The MCP server (<code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">mcp-server/server.py</code>)
284+
is a standalone Python process that exposes 7 tools to AI assistants:
285+
</p>
286+
287+
<DocsCodeBlock language="python">
288+
{`# Tools exposed via MCP protocol
289+
search_code # Semantic code search
290+
list_repositories # List indexed repos
291+
get_dependency_graph # File dependency graph
292+
analyze_code_style # Team coding conventions
293+
analyze_impact # Change impact analysis
294+
get_repository_insights # High-level repo metrics
295+
get_codebase_dna # Architectural patterns`}
296+
</DocsCodeBlock>
297+
298+
<p className="text-gray-300 mt-4 mb-4">
299+
The MCP server is a thin proxy - it forwards requests to the FastAPI backend
300+
and formats responses for AI consumption. It uses <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">httpx</code> for
301+
async HTTP calls and the <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">mcp</code> library
302+
for protocol handling.
303+
</p>
304+
305+
<h2 id="database-schema" className="text-2xl font-semibold text-white mt-12 mb-4">Database Schema</h2>
306+
307+
<p className="text-gray-300 mb-4">
308+
Supabase tables (see <code className="px-1.5 py-0.5 bg-white/10 rounded text-sm">supabase/migrations/</code>):
309+
</p>
310+
311+
<DocsCodeBlock language="sql">
312+
{`-- Core tables in codeintel schema
313+
codeintel.repositories # Repo metadata, status, user_id
314+
codeintel.api_keys # User API keys for programmatic access
315+
codeintel.user_limits # Rate limits and quotas per user
316+
317+
-- Auth handled by Supabase Auth (auth.users)
318+
-- Row Level Security (RLS) enforces user isolation`}
319+
</DocsCodeBlock>
320+
321+
<DocsCallout type="info">
322+
Vector embeddings are stored in Pinecone, not PostgreSQL.
323+
This allows efficient similarity search at scale.
324+
</DocsCallout>
325+
326+
<DocsPagination
327+
prev={{ title: 'Analysis API', href: '/docs/api/analysis' }}
328+
next={{ title: 'Contributing', href: '/docs/contributing' }}
329+
/>
330+
</DocsLayout>
331+
)
332+
}
333+
334+
function ServiceCard({ name, description }: { name: string; description: string }) {
335+
return (
336+
<div className="p-4 bg-white/[0.02] border border-white/10 rounded-lg">
337+
<h4 className="font-mono text-blue-400 mb-2">{name}</h4>
338+
<p className="text-sm text-gray-400">{description}</p>
339+
</div>
340+
)
341+
}

0 commit comments

Comments
 (0)