Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ node_modules
llm-server/.venv3
venv3/
llm-server\venv3
__pycache__/
*.pyc
*.pyo
Binary file removed llm-server/__pycache__/app.cpython-312.pyc
Binary file not shown.
38 changes: 26 additions & 12 deletions llm-server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,6 @@ def format(self, record):
"https://www.gitforme.tech"
]
CORS(app, origins=allowed_origins, supports_credentials=True)
@app.after_request
def apply_cors(response):
response.headers["Access-Control-Allow-Origin"] = "https://www.gitforme.tech"
response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
response.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
return response

app.config['PROPAGATE_EXCEPTIONS'] = True
app.config['DEBUG'] = True
Expand All @@ -71,10 +65,12 @@ def apply_cors(response):
logging.critical(f"Failed to load embedding model: {e}")
exit()

repo_cache = LRUCache(maxsize=5)
repo_cache = LRUCache(maxsize=20) # Increased from 5 to 20 repositories for 4x better cache hit rates and reduced GitHub API calls
global_api_call_times = deque()
GLOBAL_MAX_CALLS_PER_HOUR = 10
WINDOW_SECONDS = 3600
MAX_FILES_TO_PROCESS = 200 # Limit number of files to prevent memory issues
MAX_FILE_SIZE = 100000 # Max file size in bytes

def extract_owner_repo(repo_url: str):
if "github.com" in repo_url:
Expand All @@ -86,6 +82,9 @@ def extract_owner_repo(repo_url: str):
raise ValueError(f"Invalid GitHub repo format: {repo_url}. Expected 'owner/repo' or a GitHub URL.")
return parts[0], parts[1]

# Set of directories to skip for more efficient filtering
SKIP_DIRECTORIES = {'node_modules', 'vendor', 'dist', 'build', '__pycache__', '.git', 'venv', 'target', 'bin', 'obj'}

def summarize_code(file_path, code):
summary_lines = []
lines = code.splitlines()
Expand Down Expand Up @@ -148,12 +147,15 @@ async def get_relevant_context(repo_url, query):

files_to_fetch = [
f for f in tree_json.get("tree", [])
if f['type'] == 'blob' and not f['path'].startswith('.') and f['size'] < 100000
if f['type'] == 'blob'
and not f['path'].startswith('.')
and not any(skip_dir in f['path'].split('/') for skip_dir in SKIP_DIRECTORIES)
and f['size'] < MAX_FILE_SIZE
and f['path'].endswith((
'.py', '.js', '.ts', '.tsx', '.go', '.rs', '.java', '.cs', '.php', '.rb',
'.json', '.yml', '.yaml', 'Dockerfile', 'README.md', 'CONTRIBUTING.md'
))
]
][:MAX_FILES_TO_PROCESS] # Limit total files to process
if not files_to_fetch:
return None, "No relevant code or documentation files were found in this repository."
logging.info(f"Identified {len(files_to_fetch)} files to fetch content for.")
Expand All @@ -179,11 +181,23 @@ async def get_relevant_context(repo_url, query):
file_paths = list(file_summaries.keys())
code_chunks = list(file_summaries.values())

# Process embeddings in batches to reduce memory usage
embedding_start_time = time.time()
EMBEDDING_BATCH_SIZE = 50
all_embeddings = []

with torch.inference_mode():
encoded = EMBEDDING_TOKENIZER(code_chunks, padding=True, truncation=True, return_tensors='pt', max_length=512)
output = EMBEDDING_MODEL(**encoded)
embeddings = output.last_hidden_state.mean(dim=1).cpu().numpy().astype('float32')
for i in range(0, len(code_chunks), EMBEDDING_BATCH_SIZE):
batch = code_chunks[i:i + EMBEDDING_BATCH_SIZE]
encoded = EMBEDDING_TOKENIZER(batch, padding=True, truncation=True, return_tensors='pt', max_length=512)
output = EMBEDDING_MODEL(**encoded)
batch_embeddings = output.last_hidden_state.mean(dim=1).cpu().numpy().astype('float32')
all_embeddings.append(batch_embeddings)

if not all_embeddings:
return None, "No valid embeddings could be generated from the repository files."

embeddings = np.vstack(all_embeddings)
logging.info(f"Generated {len(embeddings)} embeddings in {time.time() - embedding_start_time:.2f}s.")

faiss_index_start_time = time.time()
Expand Down
124 changes: 70 additions & 54 deletions server/Controllers/GithubController.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,9 @@ const axios = require('axios');
const User = require('../models/UserModel');
const redisClient = require('../util/RediaClient');
const { Octokit } = require("@octokit/rest");
const { createGithubApi } = require('../util/GithubApiHelper');


const createGithubApi = async (session) => {
const headers = { 'Accept': 'application/vnd.github.v3+json' };

if (session?.userId) {
const user = await User.findById(session.userId);
if (user?.githubAccessToken) {
headers['Authorization'] = `token ${user.githubAccessToken}`;
console.log(`Making authenticated GitHub API request for user ${user.username}.`);
return axios.create({ baseURL: 'https://api.github.com', headers });
}
}

console.log('Making unauthenticated GitHub API request (fallback).');
return axios.create({ baseURL: 'https://api.github.com', headers });
};

exports.getRepoTimeline = async (req, res) => {
const { username, reponame } = req.params;
const userId = req.session.userId || 'public';
Expand All @@ -40,28 +25,42 @@ exports.getRepoTimeline = async (req, res) => {
// 2. Fetch all tags
const { data: tagsData } = await githubApi.get(`/repos/${username}/${reponame}/tags`);

// 3. Fetch commits (limit to 500 using per_page and pagination)
// 3. Fetch commits (limit to 500 using per_page and pagination with early exit)
const commits = [];
let page = 1;
const maxCommits = 500;
const perPage = 100;
const maxPages = Math.ceil(maxCommits / perPage);

// Use Promise.all to fetch pages in parallel for better performance
const pagePromises = [];
for (let page = 1; page <= maxPages; page++) {
pagePromises.push(
githubApi.get(`/repos/${username}/${reponame}/commits`, {
params: { per_page: perPage, page },
}).catch(err => {
console.warn(`Failed to fetch page ${page}:`, err.message);
return { data: [] };
})
);
}

while (commits.length < 500) {
const { data: pageCommits } = await githubApi.get(`/repos/${username}/${reponame}/commits`, {
params: { per_page: perPage, page },
});
const pageResults = await Promise.all(pagePromises);
for (const { data: pageCommits } of pageResults) {
if (pageCommits.length === 0) break;
commits.push(...pageCommits);
if (pageCommits.length < perPage) break;
page++;
if (commits.length >= maxCommits) break;
}

// Trim to exact limit if we exceeded - keep only first maxCommits items
const trimmedCommits = commits.slice(0, maxCommits);

// Map tags to SHAs
const tagMap = {};
for (const tag of tagsData) {
tagMap[tag.commit.sha] = tag.name;
}

const processedCommits = commits.map(commit => ({
const processedCommits = trimmedCommits.map(commit => ({
sha: commit.sha,
message: commit.commit.message,
author: {
Expand Down Expand Up @@ -108,31 +107,35 @@ exports.fetchCodeHotspots = async (req, res) => {
params: { per_page: 100 }
});

const commitDetailsPromises = commitsResponse.data.map(commit =>
githubApi.get(commit.url)
);
const commitDetails = await Promise.all(commitDetailsPromises);

const fileChurn = new Map();
commitDetails.forEach(commitDetail => {
if (commitDetail.data.files) {
commitDetail.data.files.forEach(file => {
fileChurn.set(file.filename, (fileChurn.get(file.filename) || 0) + 1);
});
}
});
// Limit concurrency to avoid overwhelming the API
const CONCURRENCY_LIMIT = 10;
const fileChurn = new Map();

for (let i = 0; i < commitsResponse.data.length; i += CONCURRENCY_LIMIT) {
const batch = commitsResponse.data.slice(i, i + CONCURRENCY_LIMIT);
const batchPromises = batch.map(commit => githubApi.get(commit.url));
const batchDetails = await Promise.all(batchPromises);

batchDetails.forEach(commitDetail => {
if (commitDetail.data.files) {
commitDetail.data.files.forEach(file => {
fileChurn.set(file.filename, (fileChurn.get(file.filename) || 0) + 1);
});
}
});
}

const hotspots = Array.from(fileChurn, ([path, churn]) => ({ path, churn }))
.sort((a, b) => b.churn - a.churn);
const hotspots = Array.from(fileChurn, ([path, churn]) => ({ path, churn }))
.sort((a, b) => b.churn - a.churn);

await redisClient.set(cacheKey, JSON.stringify(hotspots), { EX: 3600 });
res.json(hotspots);
await redisClient.set(cacheKey, JSON.stringify(hotspots), { EX: 3600 });
res.json(hotspots);

} catch (error) {
console.error("Error fetching code hotspots:", error.response?.data || error.message);
res.status(error.response?.status || 500).json({ message: "Error fetching code hotspots from GitHub." });
}
};
} catch (error) {
console.error("Error fetching code hotspots:", error.response?.data || error.message);
res.status(error.response?.status || 500).json({ message: "Error fetching code hotspots from GitHub." });
}
};

exports.fetchIssueTimeline = async (req, res) => {
const { username, reponame, issue_number } = req.params;
Expand Down Expand Up @@ -381,14 +384,27 @@ exports.fetchDeployments = async (req, res) => {
return res.json([]);
}

const statusPromises = deployments.map(deployment =>
githubApi.get(deployment.statuses_url).then(statusResponse => ({
...deployment,
statuses: statusResponse.data
}))
);
// Batch deployment status fetches with concurrency control
const CONCURRENCY_LIMIT = 10;
const deploymentsWithStatuses = [];

const deploymentsWithStatuses = await Promise.all(statusPromises);
for (let i = 0; i < deployments.length; i += CONCURRENCY_LIMIT) {
const batch = deployments.slice(i, i + CONCURRENCY_LIMIT);
const batchPromises = batch.map(deployment =>
githubApi.get(deployment.statuses_url)
.then(statusResponse => ({
...deployment,
statuses: statusResponse.data
}))
.catch(err => {
console.warn(`Failed to fetch status for deployment ${deployment.id}:`, err.message);
return { ...deployment, statuses: [] };
})
);

const batchResults = await Promise.all(batchPromises);
deploymentsWithStatuses.push(...batchResults);
}

const activeDeploymentUrls = new Map();
deploymentsWithStatuses.forEach(deployment => {
Expand Down
60 changes: 26 additions & 34 deletions server/Controllers/InsightController.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,7 @@
const axios = require('axios');
const User = require('../models/UserModel');
const redisClient = require('../util/RediaClient');

const createGithubApi = async (session) => {
const headers = { 'Accept': 'application/vnd.github.v3+json' };

if (session?.userId) {
try {
const user = await User.findById(session.userId);
if (user?.githubAccessToken) {
headers['Authorization'] = `token ${user.githubAccessToken}`;
console.log(`Making authenticated GitHub API request for user ${user.username}.`);
return axios.create({ baseURL: 'https://api.github.com', headers });
}
} catch (dbError) {
console.error("Error fetching user for authenticated API call:", dbError.message);
}
}

console.log('Making unauthenticated GitHub API request (fallback).');
return axios.create({ baseURL: 'https://api.github.com', headers });
};
const { createGithubApi } = require('../util/GithubApiHelper');

exports.fetchDependencyHealth = async (req, res) => {
const { username, reponame } = req.params;
Expand Down Expand Up @@ -59,22 +40,33 @@ exports.fetchDependencyHealth = async (req, res) => {
return res.json({ dependencies: [], summary: { total: 0, outdated: 0, deprecated: 0, licenses: [] } });
}

const dependencyPromises = Object.entries(dependencies).map(async ([name, version]) => {
try {
const npmResponse = await axios.get(`https://registry.npmjs.org/${name}`);
const latestVersion = npmResponse.data['dist-tags'].latest;
const license = npmResponse.data.license || 'N/A';
const isDeprecated = !!npmResponse.data.deprecated;
const isOutdated = latestVersion !== version.replace(/[\^~>=<]/g, '');
// Batch dependency checks with concurrency control to avoid overwhelming npm registry
const CONCURRENCY_LIMIT = 10;
const dependencyEntries = Object.entries(dependencies);
const healthReport = [];

for (let i = 0; i < dependencyEntries.length; i += CONCURRENCY_LIMIT) {
const batch = dependencyEntries.slice(i, i + CONCURRENCY_LIMIT);
const batchPromises = batch.map(async ([name, version]) => {
try {
const npmResponse = await axios.get(`https://registry.npmjs.org/${name}`, {
timeout: 5000 // Add timeout to prevent hanging
});
const latestVersion = npmResponse.data['dist-tags'].latest;
const license = npmResponse.data.license || 'N/A';
const isDeprecated = !!npmResponse.data.deprecated;
const isOutdated = latestVersion !== version.replace(/[\^~>=<]/g, '');

return { name, version, latestVersion, license, isOutdated, isDeprecated };
} catch (error) {
console.error(`Error fetching data for ${name}:`, error.message);
return { name, version, error: 'Package not found in npm registry' };
}
});
return { name, version, latestVersion, license, isOutdated, isDeprecated };
} catch (error) {
console.error(`Error fetching data for ${name}:`, error.message);
return { name, version, error: 'Package not found in npm registry' };
}
});

const healthReport = await Promise.all(dependencyPromises);
const batchResults = await Promise.all(batchPromises);
healthReport.push(...batchResults);
}

const summary = {
total: healthReport.length,
Expand Down
19 changes: 1 addition & 18 deletions server/api/githubApi.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const axios = require('axios');
const redisClient = require('../util/RediaClient');
const User = require('../models/UserModel');
const { createGithubApi } = require('../util/GithubApiHelper');

const githubApi = axios.create({
baseURL: 'https://api.github.com',
Expand Down Expand Up @@ -68,21 +69,3 @@ exports.fetchRepoDetails = async (req, res) => {
}
};

const createGithubApi = async (session) => {
const headers = { Accept: 'application/vnd.github.v3+json' };

if (session?.userId) {
const user = await User.findById(session.userId);
if (user?.githubAccessToken) {
headers['Authorization'] = `token ${user.githubAccessToken}`;
console.log(
`Making authenticated GitHub API request for user ${user.username}.`
);
return axios.create({ baseURL: 'https://api.github.com', headers });
}
}

console.log('Making unauthenticated GitHub API request (fallback).');
return axios.create({ baseURL: 'https://api.github.com', headers });
};

29 changes: 29 additions & 0 deletions server/util/GithubApiHelper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
const axios = require('axios');
const User = require('../models/UserModel');

/**
* Creates an authenticated or unauthenticated GitHub API client based on session
* @param {Object} session - Express session object containing userId
* @returns {Promise<Object>} Axios instance configured for GitHub API
*/
const createGithubApi = async (session) => {
const headers = { 'Accept': 'application/vnd.github.v3+json' };

if (session?.userId) {
try {
const user = await User.findById(session.userId);
if (user?.githubAccessToken) {
headers['Authorization'] = `token ${user.githubAccessToken}`;
console.log(`Making authenticated GitHub API request for user ${user.username}.`);
return axios.create({ baseURL: 'https://api.github.com', headers });
}
} catch (dbError) {
console.error("Error fetching user for authenticated API call:", dbError.message);
}
}

console.log('Making unauthenticated GitHub API request (fallback).');
return axios.create({ baseURL: 'https://api.github.com', headers });
};

module.exports = { createGithubApi };