Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 749cf0c

Browse files
authored
chore: auto load model on /chat/completions request (#900)
1 parent 9857448 commit 749cf0c

File tree

14 files changed

+154
-30
lines changed

14 files changed

+154
-30
lines changed

cortex-js/src/domain/abstracts/engine.abstract.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ import stream from 'stream';
33
import { Model, ModelSettingParams } from '../../domain/models/model.interface';
44
import { Extension } from './extension.abstract';
55

6+
/**
7+
* This class should be extended by any class that represents an engine extension.
8+
* It provides methods for loading and unloading models, and for making inference requests.
9+
*/
610
export abstract class EngineExtension extends Extension {
711
abstract onLoad(): void;
812

@@ -12,16 +16,42 @@ export abstract class EngineExtension extends Extension {
1216

1317
initalized: boolean = false;
1418

19+
/**
20+
* Makes an inference request to the engine.
21+
* @param dto
22+
* @param headers
23+
*/
1524
abstract inference(
1625
dto: any,
1726
headers: Record<string, string>,
1827
): Promise<stream.Readable | any>;
1928

29+
/**
30+
* Checks if a model is running by the engine
31+
* This method should check run-time status of the model
32+
* Since the model can be corrupted during the run-time
33+
* This method should return false if the model is not running
34+
* @param modelId
35+
*/
36+
async isModelRunning(modelId: string): Promise<boolean> {
37+
return true;
38+
}
39+
40+
/**
41+
* Loads a model into the engine.
42+
* There are model settings such as `ngl` and `ctx_len` that can be passed to the engine.
43+
* Applicable for local engines only
44+
* @param model
45+
* @param settingParams
46+
*/
2047
async loadModel(
2148
model: Model,
2249
settingParams?: ModelSettingParams,
2350
): Promise<void> {}
2451

52+
/**
53+
* Unloads a model from the engine.
54+
* @param modelId
55+
*/
2556
async unloadModel(modelId: string): Promise<void> {}
26-
2757
}

cortex-js/src/domain/models/model.interface.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ export interface ModelSettingParams {
4040
* The number of layers to load onto the GPU for acceleration.
4141
*/
4242
ngl?: number;
43+
44+
/**
45+
* Support embedding or not (legacy)
46+
*/
4347
embedding?: boolean;
4448

4549
/**
@@ -117,6 +121,11 @@ export interface ModelSettingParams {
117121
* To enable mmap, default is true
118122
*/
119123
use_mmap?: boolean;
124+
125+
/**
126+
* Model type we want to use: llm or embedding, default value is llm (latest llama.cpp update)
127+
*/
128+
model_type?: string;
120129
}
121130

122131
/**

cortex-js/src/infrastructure/constants/cortex.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ export const defaultCortexJsPort = 1337;
77

88
export const defaultCortexCppHost = '127.0.0.1';
99
export const defaultCortexCppPort = 3929;
10+
11+
export const defaultEmbeddingModel = 'nomic-embed-text-v1';
1012
// CORTEX CPP
1113
export const CORTEX_CPP_EMBEDDINGS_URL = (
1214
host: string = defaultCortexCppHost,
@@ -50,4 +52,4 @@ export const CUDA_DOWNLOAD_URL =
5052

5153
export const telemetryServerUrl = 'https://telemetry.jan.ai';
5254

53-
export const MIN_CUDA_VERSION = '12.3';
55+
export const MIN_CUDA_VERSION = '12.3';

cortex-js/src/infrastructure/controllers/chat.controller.spec.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { DownloadManagerModule } from '@/infrastructure/services/download-manage
99
import { EventEmitterModule } from '@nestjs/event-emitter';
1010
import { TelemetryModule } from '@/usecases/telemetry/telemetry.module';
1111
import { FileManagerModule } from '../services/file-manager/file-manager.module';
12+
import { ModelsModule } from '@/usecases/models/models.module';
1213

1314
describe('ChatController', () => {
1415
let controller: ChatController;
@@ -25,6 +26,7 @@ describe('ChatController', () => {
2526
EventEmitterModule.forRoot(),
2627
TelemetryModule,
2728
FileManagerModule,
29+
ModelsModule,
2830
],
2931
controllers: [ChatController],
3032
providers: [ChatUsecases],

cortex-js/src/infrastructure/controllers/chat.controller.ts

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,21 @@ export class ChatController {
3535
) {
3636
const { stream } = createChatDto;
3737

38-
if (stream) {
39-
this.chatService
40-
.inference(createChatDto, extractCommonHeaders(headers))
41-
.then((stream) => {
38+
this.chatService
39+
.inference(createChatDto, extractCommonHeaders(headers))
40+
.then((response) => {
41+
if (stream) {
4242
res.header('Content-Type', 'text/event-stream');
43-
stream.pipe(res);
44-
})
45-
.catch((error) =>
46-
res.status(error.statusCode ?? 400).send(error.message),
47-
);
48-
} else {
49-
res.header('Content-Type', 'application/json');
50-
this.chatService
51-
.inference(createChatDto, extractCommonHeaders(headers))
52-
.then((response) => {
43+
response.pipe(res);
44+
} else {
45+
res.header('Content-Type', 'application/json');
5346
res.json(response);
54-
})
55-
.catch((error) =>
56-
res.status(error.statusCode ?? 400).send(error.message),
57-
);
58-
}
47+
}
48+
})
49+
.catch((error) =>
50+
res.status(error.statusCode ?? 400).send(error.message),
51+
);
52+
5953
this.telemetryUsecases.addEventToQueue({
6054
name: EventName.CHAT,
6155
modelId: createChatDto.model,

cortex-js/src/infrastructure/controllers/embeddings.controller.spec.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { DownloadManagerModule } from '@/infrastructure/services/download-manage
99
import { EventEmitterModule } from '@nestjs/event-emitter';
1010
import { TelemetryModule } from '@/usecases/telemetry/telemetry.module';
1111
import { FileManagerModule } from '../services/file-manager/file-manager.module';
12+
import { ModelsModule } from '@/usecases/models/models.module';
1213

1314
describe('EmbeddingsController', () => {
1415
let controller: EmbeddingsController;
@@ -25,6 +26,7 @@ describe('EmbeddingsController', () => {
2526
EventEmitterModule.forRoot(),
2627
TelemetryModule,
2728
FileManagerModule,
29+
ModelsModule,
2830
],
2931
controllers: [EmbeddingsController],
3032
providers: [ChatUsecases],

cortex-js/src/infrastructure/controllers/embeddings.controller.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Body, Controller, Post, HttpCode } from '@nestjs/common';
1+
import { Body, Controller, Post, HttpCode, Res } from '@nestjs/common';
22
import { ChatUsecases } from '@/usecases/chat/chat.usecases';
33
import { ApiOperation, ApiTags, ApiResponse } from '@nestjs/swagger';
44
import { CreateEmbeddingsDto } from '../dtos/embeddings/embeddings-request.dto';

cortex-js/src/infrastructure/dtos/messages/create-message.dto.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export class CreateMessageDto implements Partial<Message> {
2121
example: 'user',
2222
description: 'The sources of the messages.',
2323
})
24+
@IsString()
2425
role: 'user' | 'assistant';
2526

2627
@ApiProperty({

cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
import { Injectable } from '@nestjs/common';
1+
import { HttpStatus, Injectable } from '@nestjs/common';
22
import { OAIEngineExtension } from '@/domain/abstracts/oai.abstract';
33
import { PromptTemplate } from '@/domain/models/prompt-template.interface';
44
import { join } from 'path';
55
import { Model, ModelSettingParams } from '@/domain/models/model.interface';
66
import { HttpService } from '@nestjs/axios';
77
import {
8+
CORTEX_CPP_MODELS_URL,
89
defaultCortexCppHost,
910
defaultCortexCppPort,
1011
} from '@/infrastructure/constants/cortex';
@@ -13,6 +14,11 @@ import { normalizeModelId } from '@/utils/normalize-model-id';
1314
import { firstValueFrom } from 'rxjs';
1415
import { FileManagerService } from '@/infrastructure/services/file-manager/file-manager.service';
1516

17+
export interface ModelStatResponse {
18+
object: string;
19+
data: any;
20+
}
21+
1622
@Injectable()
1723
export default class CortexProvider extends OAIEngineExtension {
1824
apiUrl = `http://${defaultCortexCppHost}:${defaultCortexCppPort}/inferences/server/chat_completion`;
@@ -28,11 +34,12 @@ export default class CortexProvider extends OAIEngineExtension {
2834

2935
constructor(
3036
protected readonly httpService: HttpService,
31-
private readonly fileManagerService: FileManagerService,
37+
protected readonly fileManagerService: FileManagerService,
3238
) {
3339
super(httpService);
3440
}
3541

42+
// Override the inference method to make an inference request to the engine
3643
override async loadModel(
3744
model: Model,
3845
settings?: ModelSettingParams,
@@ -92,6 +99,30 @@ export default class CortexProvider extends OAIEngineExtension {
9299
).then(); // pipe error or void instead of throwing
93100
}
94101

102+
// Override the isModelRunning method to check if the model is running
103+
override async isModelRunning(modelId: string): Promise<boolean> {
104+
const configs = await this.fileManagerService.getConfig();
105+
106+
return firstValueFrom(
107+
this.httpService.get(
108+
CORTEX_CPP_MODELS_URL(configs.cortexCppHost, configs.cortexCppPort),
109+
),
110+
)
111+
.then((res) => {
112+
const data = res.data as ModelStatResponse;
113+
if (
114+
res.status === HttpStatus.OK &&
115+
data &&
116+
Array.isArray(data.data) &&
117+
data.data.length > 0
118+
) {
119+
return data.data.find((e) => e.id === modelId);
120+
}
121+
return false;
122+
})
123+
.catch(() => false);
124+
}
125+
95126
private readonly promptTemplateConverter = (
96127
promptTemplate: string,
97128
): PromptTemplate => {

cortex-js/src/usecases/chat/chat.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { ModelRepositoryModule } from '@/infrastructure/repositories/models/mode
66
import { HttpModule } from '@nestjs/axios';
77
import { TelemetryModule } from '../telemetry/telemetry.module';
88
import { FileManagerModule } from '@/infrastructure/services/file-manager/file-manager.module';
9+
import { ModelsModule } from '../models/models.module';
910

1011
@Module({
1112
imports: [
@@ -15,6 +16,7 @@ import { FileManagerModule } from '@/infrastructure/services/file-manager/file-m
1516
HttpModule,
1617
TelemetryModule,
1718
FileManagerModule,
19+
ModelsModule,
1820
],
1921
controllers: [],
2022
providers: [ChatUsecases],

0 commit comments

Comments
 (0)