|
| 1 | +import { Injectable } from '@nestjs/common'; |
| 2 | +import si from 'systeminformation'; |
| 3 | +import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; |
| 4 | +import OpenAI from 'openai'; |
| 5 | +import { Presets, SingleBar } from 'cli-progress'; |
| 6 | +import yaml from 'js-yaml'; |
| 7 | +import { FileManagerService } from '@/file-manager/file-manager.service'; |
| 8 | +import { join } from 'path'; |
| 9 | +import { ModelsCliUsecases } from './models.cli.usecases'; |
| 10 | +import { spawn } from 'child_process'; |
| 11 | +import { BenchmarkConfig } from '../types/benchmark-config.interface'; |
| 12 | +import { CortexUsecases } from '@/usecases/cortex/cortex.usecases'; |
| 13 | +import { inspect } from 'util'; |
| 14 | +import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark'; |
| 15 | + |
| 16 | +@Injectable() |
| 17 | +export class BenchmarkCliUsecases { |
| 18 | + constructor( |
| 19 | + private readonly modelsCliUsecases: ModelsCliUsecases, |
| 20 | + private readonly cortexUsecases: CortexUsecases, |
| 21 | + private readonly fileService: FileManagerService, |
| 22 | + ) {} |
| 23 | + |
| 24 | + config: BenchmarkConfig; |
| 25 | + openai?: OpenAI; |
| 26 | + /** |
| 27 | + * Benchmark and analyze the performance of a specific AI model using a variety of system resources |
| 28 | + */ |
| 29 | + async benchmark() { |
| 30 | + return this.getBenchmarkConfig().then((config) => { |
| 31 | + this.config = config; |
| 32 | + |
| 33 | + // TODO: Using OpenAI client or Cortex client to benchmark? |
| 34 | + this.openai = new OpenAI({ |
| 35 | + apiKey: this.config.api.api_key, |
| 36 | + baseURL: this.config.api.base_url, |
| 37 | + timeout: 20 * 1000, |
| 38 | + }); |
| 39 | + |
| 40 | + spawn('cortex', ['serve'], { |
| 41 | + detached: false, |
| 42 | + }); |
| 43 | + |
| 44 | + return this.cortexUsecases |
| 45 | + .startCortex() |
| 46 | + .then(() => |
| 47 | + this.modelsCliUsecases.startModel(this.config.api.parameters.model), |
| 48 | + ) |
| 49 | + .then(() => this.runBenchmarks()) |
| 50 | + .then(() => process.exit(0)); |
| 51 | + }); |
| 52 | + } |
| 53 | + |
| 54 | + /** |
| 55 | + * Get the benchmark configuration |
| 56 | + * @returns the benchmark configuration |
| 57 | + */ |
| 58 | + private async getBenchmarkConfig() { |
| 59 | + const benchmarkFolder = await this.fileService.getBenchmarkPath(); |
| 60 | + const configurationPath = join(benchmarkFolder, 'config.yaml'); |
| 61 | + if (existsSync(configurationPath)) { |
| 62 | + return yaml.load( |
| 63 | + readFileSync(configurationPath, 'utf8'), |
| 64 | + ) as BenchmarkConfig; |
| 65 | + } else { |
| 66 | + const config = yaml.dump(defaultBenchmarkConfiguration); |
| 67 | + if (!existsSync(benchmarkFolder)) { |
| 68 | + mkdirSync(benchmarkFolder, { |
| 69 | + recursive: true, |
| 70 | + }); |
| 71 | + } |
| 72 | + await writeFileSync(configurationPath, config, 'utf8'); |
| 73 | + return defaultBenchmarkConfiguration; |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + /** |
| 78 | + * Get the system resources for benchmarking |
| 79 | + * using the systeminformation library |
| 80 | + * @returns the system resources |
| 81 | + */ |
| 82 | + private async getSystemResources() { |
| 83 | + return { |
| 84 | + cpu: await si.currentLoad(), |
| 85 | + mem: await si.mem(), |
| 86 | + gpu: (await si.graphics()).controllers, |
| 87 | + }; |
| 88 | + } |
| 89 | + |
| 90 | + /** |
| 91 | + * Get the resource change between two data points |
| 92 | + * @param startData the start data point |
| 93 | + * @param endData the end data point |
| 94 | + * @returns the resource change |
| 95 | + */ |
| 96 | + private async getResourceChange(startData: any, endData: any) { |
| 97 | + return { |
| 98 | + cpu: |
| 99 | + startData.cpu && endData.cpu |
| 100 | + ? ((endData.cpu.currentload - startData.cpu.currentload) / |
| 101 | + startData.cpu.currentload) * |
| 102 | + 100 |
| 103 | + : null, |
| 104 | + mem: |
| 105 | + startData.mem && endData.mem |
| 106 | + ? ((endData.mem.used - startData.mem.used) / startData.mem.total) * |
| 107 | + 100 |
| 108 | + : null, |
| 109 | + }; |
| 110 | + } |
| 111 | + |
| 112 | + /** |
| 113 | + * Benchmark a user using the OpenAI API |
| 114 | + * @returns |
| 115 | + */ |
| 116 | + private async benchmarkUser() { |
| 117 | + const startResources = await this.getSystemResources(); |
| 118 | + const start = Date.now(); |
| 119 | + let tokenCount = 0; |
| 120 | + let firstTokenTime = null; |
| 121 | + |
| 122 | + try { |
| 123 | + const stream = await this.openai!.chat.completions.create({ |
| 124 | + model: this.config.api.parameters.model, |
| 125 | + messages: this.config.api.parameters.messages, |
| 126 | + max_tokens: this.config.api.parameters.max_tokens, |
| 127 | + stream: true, |
| 128 | + }); |
| 129 | + |
| 130 | + for await (const chunk of stream) { |
| 131 | + if (!firstTokenTime && chunk.choices[0]?.delta?.content) { |
| 132 | + firstTokenTime = Date.now(); |
| 133 | + } |
| 134 | + tokenCount += (chunk.choices[0]?.delta?.content || '').split( |
| 135 | + /\s+/, |
| 136 | + ).length; |
| 137 | + } |
| 138 | + } catch (error) { |
| 139 | + console.error('Error during API call:', error); |
| 140 | + return null; |
| 141 | + } |
| 142 | + |
| 143 | + const latency = Date.now() - start; |
| 144 | + const ttft = firstTokenTime ? firstTokenTime - start : null; |
| 145 | + const endResources = await this.getSystemResources(); |
| 146 | + const resourceChange = await this.getResourceChange( |
| 147 | + startResources, |
| 148 | + endResources, |
| 149 | + ); |
| 150 | + |
| 151 | + return { |
| 152 | + tokens: this.config.api.parameters.max_tokens, |
| 153 | + token_length: tokenCount, // Dynamically calculated token count |
| 154 | + latency, |
| 155 | + resourceChange, |
| 156 | + tpot: tokenCount ? latency / tokenCount : 0, |
| 157 | + throughput: tokenCount / (latency / 1000), |
| 158 | + ttft, |
| 159 | + }; |
| 160 | + } |
| 161 | + |
| 162 | + /** |
| 163 | + * Calculate the percentiles of the data |
| 164 | + * @param data the data to calculate percentiles for |
| 165 | + * @param percentile the percentile to calculate |
| 166 | + * @returns the percentile value |
| 167 | + */ |
| 168 | + private calculatePercentiles(data: number[], percentile: number) { |
| 169 | + if (data.length === 0) return null; |
| 170 | + const sorted = data |
| 171 | + .filter((x: number) => x !== null) |
| 172 | + .sort((a: number, b: number) => a - b); |
| 173 | + const pos = (percentile / 100) * sorted.length; |
| 174 | + if (pos < 1) return sorted[0]; |
| 175 | + if (pos >= sorted.length) return sorted[sorted.length - 1]; |
| 176 | + const lower = sorted[Math.floor(pos) - 1]; |
| 177 | + const upper = sorted[Math.ceil(pos) - 1]; |
| 178 | + return lower + (upper - lower) * (pos - Math.floor(pos)); |
| 179 | + } |
| 180 | + |
| 181 | + /** |
| 182 | + * Run the benchmarks |
| 183 | + */ |
| 184 | + private async runBenchmarks() { |
| 185 | + const allResults: any[] = []; |
| 186 | + const rounds = this.config.num_rounds || 1; |
| 187 | + |
| 188 | + const bar = new SingleBar({}, Presets.shades_classic); |
| 189 | + bar.start(rounds, 0); |
| 190 | + |
| 191 | + for (let i = 0; i < rounds; i++) { |
| 192 | + const roundResults = []; |
| 193 | + const hardwareBefore = await this.getSystemResources(); |
| 194 | + |
| 195 | + for (let j = 0; j < this.config.concurrency; j++) { |
| 196 | + const result = await this.benchmarkUser(); |
| 197 | + if (result) { |
| 198 | + roundResults.push(result); |
| 199 | + } |
| 200 | + } |
| 201 | + |
| 202 | + const hardwareAfter = await this.getSystemResources(); |
| 203 | + const hardwareChanges = await this.getResourceChange( |
| 204 | + hardwareBefore, |
| 205 | + hardwareAfter, |
| 206 | + ); |
| 207 | + |
| 208 | + allResults.push({ |
| 209 | + round: i + 1, |
| 210 | + results: roundResults, |
| 211 | + hardwareChanges, |
| 212 | + }); |
| 213 | + |
| 214 | + bar.update(i + 1); |
| 215 | + } |
| 216 | + |
| 217 | + const metrics: any = { |
| 218 | + p50: {}, |
| 219 | + p75: {}, |
| 220 | + p95: {}, |
| 221 | + }; |
| 222 | + const keys = ['latency', 'tpot', 'throughput', 'ttft']; |
| 223 | + keys.forEach((key) => { |
| 224 | + const data = allResults.flatMap((r) => |
| 225 | + r.results.map((res: object) => res[key as keyof typeof res]), |
| 226 | + ); |
| 227 | + metrics.p50[key] = this.calculatePercentiles(data, 50); |
| 228 | + metrics.p75[key] = this.calculatePercentiles(data, 75); |
| 229 | + metrics.p95[key] = this.calculatePercentiles(data, 95); |
| 230 | + }); |
| 231 | + |
| 232 | + const output = { |
| 233 | + hardware: await this.getSystemResources(), |
| 234 | + results: allResults, |
| 235 | + metrics, |
| 236 | + }; |
| 237 | + bar.stop(); |
| 238 | + |
| 239 | + const outputFilePath = join( |
| 240 | + await this.fileService.getBenchmarkPath(), |
| 241 | + 'output.json', |
| 242 | + ); |
| 243 | + fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2)); |
| 244 | + console.log(`Benchmark results and metrics saved to ${outputFilePath}`); |
| 245 | + |
| 246 | + console.log( |
| 247 | + inspect(output, { showHidden: false, depth: null, colors: true }), |
| 248 | + ); |
| 249 | + } |
| 250 | +} |
0 commit comments