-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathconfig.example.json
More file actions
52 lines (48 loc) · 1.32 KB
/
config.example.json
File metadata and controls
52 lines (48 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
{
"description": "Extract2MD Configuration Schema - Complete example with all available options",
"ocr": {
"language": "eng",
"oem": 1,
"psm": 6,
"workerPath": "./tesseract-worker.min.js",
"corePath": "./tesseract-core.wasm.js",
"langPath": "./lang-data/",
"options": {
"logger": null,
"errorHandler": null
}
},
"webllm": {
"modelId": "Llama-3.2-1B-Instruct-q4f16_1-MLC",
"temperature": 0.7,
"maxTokens": 4000,
"streamingEnabled": false,
"customModel": {
"model": "https://huggingface.co/mlc-ai/custom-model/resolve/main/",
"model_id": "Custom-Model-ID",
"model_lib": "https://example.com/path/to/custom-model.wasm",
"required_features": ["shader-f16"],
"overrides": {
"conv_template": "llama"
}
}
},
"systemPrompts": {
"singleExtraction": "Focus on technical accuracy and preserve all code examples exactly as they appear.",
"combinedExtraction": "Pay special attention to diagrams and tables that might be better captured in the OCR version."
},
"processing": {
"splitPascalCase": false,
"pdfRenderScale": 2.5,
"postProcessRules": [
{
"find": "\\bAPI\\b",
"replace": "API"
},
{
"find": "\\bJSON\\b",
"replace": "JSON"
}
]
}
}