diff --git a/.gitignore b/.gitignore index 9722be4..95a32ba 100644 --- a/.gitignore +++ b/.gitignore @@ -158,4 +158,5 @@ Thumbs.db *.swp *.swo *~ -.vscode/ \ No newline at end of file +.vscode/ +.onnx-tests/ diff --git a/docs.json b/docs.json index 1e1bcbd..0c78aea 100644 --- a/docs.json +++ b/docs.json @@ -92,6 +92,7 @@ "docs/inference/vllm", "docs/inference/mlx", "docs/inference/ollama", + "docs/inference/onnx", { "group": "Other Frameworks", "icon": "server", diff --git a/docs/help/faqs.mdx b/docs/help/faqs.mdx index 4610e6f..fa81927 100644 --- a/docs/help/faqs.mdx +++ b/docs/help/faqs.mdx @@ -20,7 +20,7 @@ LFM models are compatible with: - [vLLM](/docs/inference/vllm) - For high-throughput production serving - [MLX](/docs/inference/mlx) - For Apple Silicon optimization - [Ollama](/docs/inference/ollama) - For easy local deployment -- [LEAP](/leap/index) - For edge and mobile deployment +- [LEAP](/leap/edge-sdk/overview) - For edge and mobile deployment ## Model Selection @@ -49,7 +49,7 @@ LFM2.5 models are updated versions with improved training that deliver higher pe ## Deployment -Yes! Use the [LEAP SDK](/leap/index) to deploy models on iOS and Android devices. LEAP provides optimized inference for edge deployment with support for quantized models. +Yes! Use the [LEAP SDK](/leap/edge-sdk/overview) to deploy models on iOS and Android devices. LEAP provides optimized inference for edge deployment with support for quantized models. diff --git a/docs/index.mdx b/docs/index.mdx deleted file mode 100644 index 456d6a7..0000000 --- a/docs/index.mdx +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: "LFM Documentation" -description: "Redirect to LFM Getting Started" ---- - - - -Redirecting to [Getting Started](/docs/getting-started/welcome)... diff --git a/docs/inference/onnx.mdx b/docs/inference/onnx.mdx new file mode 100644 index 0000000..8474ddf --- /dev/null +++ b/docs/inference/onnx.mdx @@ -0,0 +1,251 @@ +--- +title: "ONNX" +description: "ONNX provides cross-platform inference for LFM models across CPUs, GPUs, NPUs, and browsers via WebGPU." +description: "ONNX provides a platform-agnostic inference specification that allows running the model on device-specific runtimes that include CPU, GPU, NPU, and WebGPU." +--- + + + Use ONNX for cross-platform deployment, edge devices, and browser-based inference with WebGPU and Transformers.js. + + +ONNX (Open Neural Network Exchange) is a portable format that enables LFM inference across diverse hardware and runtimes. ONNX models run on CPUs, GPUs, NPUs, and in browsers via WebGPU—making them ideal for edge deployment and web applications. + +Many LFM models are available as pre-exported ONNX packages on Hugging Face. For models not yet available, use the [LiquidONNX](#liquidonnx-export-tool) tool to export any LFM to ONNX. + +## Pre-exported Models + +Pre-exported ONNX models are available from LiquidAI and the [onnx-community](https://huggingface.co/onnx-community). Check the [Model Library](/docs/models/complete-library) for a complete list of available formats. + +### Quantization Options + +Each ONNX export includes multiple precision levels. **Q4** is recommended for most deployments and supports WebGPU, CPU, and GPU. **FP16** offers higher quality and works on WebGPU and GPU. **Q8** provides a quality/size balance but is server-only (CPU/GPU). **FP32** is the full precision baseline. + +## Python Inference + +### Installation + +```bash +pip install onnxruntime transformers numpy huggingface_hub jinja2 + +# For GPU support +pip install onnxruntime-gpu transformers numpy huggingface_hub jinja2 +``` + +### Basic Usage + +```python +import numpy as np +import onnxruntime as ort +from huggingface_hub import hf_hub_download, list_repo_files +from transformers import AutoTokenizer + +# Download Q4 model (recommended) +model_id = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX" +model_path = hf_hub_download(model_id, "onnx/model_q4.onnx") + +# Download external data files +for f in list_repo_files(model_id): + if f.startswith("onnx/model_q4.onnx_data"): + hf_hub_download(model_id, f) + +# Load model and tokenizer +session = ort.InferenceSession(model_path) +tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) + +# Prepare input +messages = [{"role": "user", "content": "What is the capital of France?"}] +prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True +) +inputs = tokenizer.encode(prompt, add_special_tokens=False) +input_ids = np.array([inputs], dtype=np.int64) + +# Initialize KV cache +DTYPE_MAP = { + "tensor(float)": np.float32, + "tensor(float16)": np.float16, + "tensor(int64)": np.int64 +} +cache = {} +for inp in session.get_inputs(): + if inp.name in {"input_ids", "attention_mask", "position_ids"}: + continue + shape = [d if isinstance(d, int) else 1 for d in inp.shape] + for i, d in enumerate(inp.shape): + if isinstance(d, str) and "sequence" in d.lower(): + shape[i] = 0 + dtype = DTYPE_MAP.get(inp.type, np.float32) + cache[inp.name] = np.zeros(shape, dtype=dtype) + +# Generate tokens +seq_len = input_ids.shape[1] +generated = [] +input_names = {inp.name for inp in session.get_inputs()} + +for step in range(100): + if step == 0: + ids = input_ids + pos = np.arange(seq_len, dtype=np.int64).reshape(1, -1) + else: + ids = np.array([[generated[-1]]], dtype=np.int64) + pos = np.array([[seq_len + len(generated) - 1]], dtype=np.int64) + + attn_mask = np.ones((1, seq_len + len(generated)), dtype=np.int64) + feed = {"input_ids": ids, "attention_mask": attn_mask, **cache} + if "position_ids" in input_names: + feed["position_ids"] = pos + + outputs = session.run(None, feed) + next_token = int(np.argmax(outputs[0][0, -1])) + generated.append(next_token) + + # Update cache + for i, out in enumerate(session.get_outputs()[1:], 1): + name = out.name.replace("present_conv", "past_conv") + name = name.replace("present.", "past_key_values.") + if name in cache: + cache[name] = outputs[i] + + if next_token == tokenizer.eos_token_id: + break + +print(tokenizer.decode(generated, skip_special_tokens=True)) +``` + +## WebGPU Inference + +ONNX models run in browsers via [Transformers.js](https://huggingface.co/docs/transformers.js) with WebGPU acceleration. This enables fully client-side inference without server infrastructure. + +### Setup + +1. Install Transformers.js: +```bash +npm install @huggingface/transformers +``` + +2. Enable WebGPU in your browser: + - **Chrome/Edge**: Navigate to `chrome://flags/#enable-unsafe-webgpu`, enable, and restart + - **Verify**: Check `chrome://gpu` for WebGPU status + +### Usage + +```javascript +import { AutoModelForCausalLM, AutoTokenizer, TextStreamer } from "@huggingface/transformers"; + +const modelId = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"; + +// Load model with WebGPU +const tokenizer = await AutoTokenizer.from_pretrained(modelId); +const model = await AutoModelForCausalLM.from_pretrained(modelId, { + device: "webgpu", + dtype: "q4", // or "fp16" +}); + +// Generate with streaming +const messages = [{ role: "user", content: "What is the capital of France?" }]; +const input = tokenizer.apply_chat_template(messages, { + add_generation_prompt: true, + return_dict: true, +}); + +const streamer = new TextStreamer(tokenizer, { skip_prompt: true }); +const output = await model.generate({ + ...input, + max_new_tokens: 256, + do_sample: false, + streamer, +}); + +console.log(tokenizer.decode(output[0], { skip_special_tokens: true })); +``` + + +WebGPU supports Q4 and FP16 precision. Q8 quantization is not available in browser environments. + + +## LiquidONNX Export Tool + +[LiquidONNX](https://github.com/Liquid4All/onnx-export) is the official tool for exporting LFM models to ONNX. Use it to export models not yet available as pre-built packages, or to customize export settings. + +### Installation + +```bash +git clone https://github.com/Liquid4All/onnx-export.git +cd onnx-export +uv sync + +# For GPU inference +uv sync --extra gpu +``` + +### Supported Models + +| Family | Quantization Formats | +|--------|---------------------| +| LFM2.5, LFM2 (text) | fp32, fp16, q4, q8 | +| LFM2.5-VL, LFM2-VL (vision) | fp32, fp16, q4, q8 | +| LFM2-MoE | fp32, fp16, q4, q4f16 | +| LFM2.5-Audio | fp32, fp16, q4, q8 | + +### Export Commands + +**Text models:** +```bash +# Export with all precisions (fp16, q4, q8) +uv run lfm2-export LiquidAI/LFM2.5-1.2B-Instruct --precision + +# Export specific precisions +uv run lfm2-export LiquidAI/LFM2.5-1.2B-Instruct --precision fp16 q4 +``` + +**Vision-language models:** +```bash +uv run lfm2-vl-export LiquidAI/LFM2.5-VL-1.6B --precision + +# Alternative vision format for specific runtimes +uv run lfm2-vl-export LiquidAI/LFM2.5-VL-1.6B --vision-format conv2d +``` + +**MoE models:** +```bash +uv run lfm2-moe-export LiquidAI/LFM2-8B-A1B --precision +``` + +**Audio models:** +```bash +uv run lfm2-audio-export LiquidAI/LFM2.5-Audio-1.5B --precision +``` + +### Export Options + +| Flag | Description | +|------|-------------| +| `--precision` | Output formats: `fp16`, `q4`, `q8`, or omit args for all | +| `--output-dir` | Output base directory (default: current directory) | +| `--skip-export` | Skip FP32 export, only run quantization on existing export | +| `--block-size` | Block size for quantization (default: 32) | +| `--q4-asymmetric` | Use asymmetric Q4 (default is symmetric for WebGPU) | +| `--split-data` | Split external data into chunks in GB (default: 2.0) | + +### Inference with LiquidONNX + +LiquidONNX includes inference commands for testing exported models: + +```bash +# Text model chat +uv run lfm2-infer --model ./exports/LFM2.5-1.2B-Instruct-ONNX/onnx/model_q4.onnx + +# Vision-language with images +uv run lfm2-vl-infer --model ./exports/LFM2.5-VL-1.6B-ONNX \ + --images photo.jpg --prompt "Describe this image" + +# Audio transcription (ASR) +uv run lfm2-audio-infer LFM2.5-Audio-1.5B-ONNX --mode asr \ + --audio input.wav --precision q4 + +# Text-to-speech (TTS) +uv run lfm2-audio-infer LFM2.5-Audio-1.5B-ONNX --mode tts \ + --prompt "Hello, how are you?" --output speech.wav --precision q4 +``` + +For complete documentation and advanced options, see the [LiquidONNX GitHub repository](https://github.com/Liquid4All/onnx-export). diff --git a/docs/key-concepts/models.mdx b/docs/key-concepts/models.mdx deleted file mode 100644 index e8ca0e2..0000000 --- a/docs/key-concepts/models.mdx +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "Models" -description: "The LFM model collection includes general-purpose language models, vision-language models, task-specific models, and audio models across various parameter sizes." ---- - -* These models are built on the backbone of a new hybrid architecture that's designed for incredibly fast training and inference. Learn more in our [blog post](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models). -* All models support a **32k token text context length** for extended conversations and document processing. -* Our models are compatible with various open-source deployment libraries including [Transformers](/docs/inference/transformers), [llama.cpp](/docs/inference/llama-cpp), [vLLM](/docs/inference/vllm), [MLX](/docs/inference/mlx), [Ollama](/docs/inference/ollama), and our own edge deployment platform [LEAP](/leap/index). - - - | Model | HF | GGUF | MLX | ONNX | Trainable? | - | ------------------------------ | ------------------------------------------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------ | -------------------------------------------------------------------- | ------------ | - | **Text-to-text Models** | | | | | | - | LFM2.5 Models (Latest Release) | | | | | | - | LFM2.5-1.2B-Instruct | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-ONNX) | Yes (TRL) | - | LFM2.5-1.2B-Base | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-GGUF) | ✗ | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-ONNX) | Yes (TRL) | - | LFM2.5-1.2B-JP | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-ONNX) | Yes (TRL) | - | LFM2 Models | | | | | | - | LFM2-8B-A1B | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B) | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-8B-A1B-8bit) | ✗ | Yes (TRL) | - | LFM2-2.6B | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-2.6B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-ONNX) | Yes (TRL) | - | LFM2-1.2B Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-1.2B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-ONNX) | Yes (TRL) | - | LFM2-700M | [✓](https://huggingface.co/LiquidAI/LFM2-700M) | [✓](https://huggingface.co/LiquidAI/LFM2-700M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-700M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-700M-ONNX) | Yes (TRL) | - | LFM2-350M | [✓](https://huggingface.co/LiquidAI/LFM2-350M) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ONNX) | Yes (TRL) | - | **Vision Language Models** | | | | | | - | LFM2.5 Models (Latest Release) | | | | | | - | LFM2.5-VL-1.6B | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF) | ✗ | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-ONNX) | Yes (TRL) | - | LFM2 Models | | | | | | - | LFM2-VL-3B | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-3B-8bit) | ✗ | Yes (TRL) | - | LFM2-VL-1.6B | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-1.6B-8bit) | ✗ | Yes (TRL) | - | LFM2-VL-450M | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-450M-8bit) | ✗ | Yes (TRL) | - | **Audio Models** | | | | | | - | LFM2.5 Models (Latest Release) | | | | | | - | LFM2.5-Audio-1.5B | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-GGUF) | ✗ | ✗ | Yes (TRL) | - | LFM2 Models | | | | | | - | LFM2-Audio-1.5B | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B-GGUF) | ✗ | ✗ | No | - | **Liquid Nanos** | | | | | | - | LFM2-1.2B-Extract | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Extract-ONNX) | Yes (TRL) | - | LFM2-350M-Extract | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Extract-ONNX) | Yes (TRL) | - | LFM2-350M-ENJP-MT | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-ENJP-MT-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ENJP-MT-ONNX) | Yes (TRL) | - | LFM2-1.2B-RAG | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-RAG-ONNX) | Yes (TRL) | - | LFM2-1.2B-Tool Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Tool-ONNX) | Yes (TRL) | - | LFM2-350M-Math | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Math-ONNX) | Yes (TRL) | - | LFM2-350M-PII-Extract-JP | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP-GGUF) | ✗ | ✗ | Yes (TRL) | - | LFM2-ColBERT-350M | [✓](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M) | ✗ | ✗ | ✗ | Yes (PyLate) | - | LFM2-2.6B-Transcript | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-Transcript-ONNX) | Yes (TRL) | - - -## 💬 Text Models[​](#lfm2 "Direct link to 💬 Text Models") - -[LFM2](https://huggingface.co/LiquidAI/collections) is a family of general-purpose text-only language models optimized for edge AI and on-device deployment. - -[]() - -### LFM2.5 Models Text Latest Release[​](#lfm25-models-text-latest-release "Direct link to lfm25-models-text-latest-release") - -| Model | Description | -| --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| [`LiquidAI/LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | Updated version of LFM2-1.2B with improved training that delivers higher performance. Instruction-tuned model optimized for chat and following instructions. **Recommended for most use cases.** | -| [`LiquidAI/LFM2.5-1.2B-Base`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base) | Base pre-trained model used to create all 1.2B variants. Ideal starting point for finetuning or building custom checkpoints. | -| [`LiquidAI/LFM2.5-1.2B-JP`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP) | Japanese language model optimized for Japanese text generation and understanding. | - -[]() - -### LFM2 Models Text[​](#lfm2-models-text "Direct link to lfm2-models-text") - -| Model | Description | -| --------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`LiquidAI/LFM2-8B-A1B`](https://huggingface.co/LiquidAI/LFM2-8B-A1B) | MoE model with 8B total parameters, 1.5B active per token for efficient inference. Best performance. | -| [`LiquidAI/LFM2-2.6B`](https://huggingface.co/LiquidAI/LFM2-2.6B) | High-performance model balancing capability and efficiency. | -| [`LiquidAI/LFM2-1.2B`](https://huggingface.co/LiquidAI/LFM2-1.2B) | Deprecated Compact model for resource-constrained environments. See updated [`LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) instead. | -| [`LiquidAI/LFM2-700M`](https://huggingface.co/LiquidAI/LFM2-700M) | Lightweight model for edge deployment. | -| [`LiquidAI/LFM2-350M`](https://huggingface.co/LiquidAI/LFM2-350M) | Tiny model for big data operations and edge deployment. Fastest inference. | - -## 👁️ Vision Models[​](#lfm2-vl "Direct link to 👁️ Vision Models") - -[LFM2-VL](https://huggingface.co/LiquidAI/collections) is a family of Vision Language Models (VLMs) that support text and image as inputs and text as outputs. These models are built on the LFM2 text model backbone with dynamic, user-tunable SigLIP2 NaFlex image encoders (Base 86M and shape-optimized 400M variants). - -[]() - -### LFM2.5 Models Vision Latest Release[​](#lfm25-models-vision-latest-release "Direct link to lfm25-models-vision-latest-release") - -| Model | Description | -| --------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`LiquidAI/LFM2.5-VL-1.6B`](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B) | Updated version of LFM2-VL-1.6B with improved training that delivers higher performance while maintaining the same architecture. **Recommended for most vision use cases.** | - -[]() - -### LFM2 Models Vision[​](#lfm2-models-vision "Direct link to lfm2-models-vision") - -| Model | Description | -| ----------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| [`LiquidAI/LFM2-VL-3B`](https://huggingface.co/LiquidAI/LFM2-VL-3B) | Highest-capacity multimodal model with enhanced visual understanding and reasoning. | -| [`LiquidAI/LFM2-VL-1.6B`](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) | Fast and capable model for scene understanding and other vision language tasks. | -| [`LiquidAI/LFM2-VL-450M`](https://huggingface.co/LiquidAI/LFM2-VL-450M) | Compact multimodal model for edge deployment and fast inference. | - -## 🎵 Audio Models[​](#lfm2-audio "Direct link to 🎵 Audio Models") - -[LFM2-Audio](https://huggingface.co/LiquidAI/collections) is a family of audio foundation models that support text and audio both as inputs and outputs. - -[]() - -### LFM2.5 Models Audio Latest Release[​](#lfm25-models-audio-latest-release "Direct link to lfm25-models-audio-latest-release") - -| Model | Description | -| --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`LiquidAI/LFM2.5-Audio-1.5B`](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B) | Updated version of LFM2-Audio with a custom LFM-based audio detokenizer for better ASR and TTS performance. **Recommended for most audio use cases.** | - -[]() - -### LFM2 Models Audio[​](#lfm2-models-audio "Direct link to lfm2-models-audio") - -| Model | Description | -| ----------------------------------------------------------------------------- | -------------------------------------------------------------------------- | -| [`LiquidAI/LFM2-Audio-1.5B`](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B) | Audio-to-audio processing model for speech tasks, like chat, ASR, and TTS. | - -## 🎯 Liquid Nanos[​](#liquid-nanos "Direct link to 🎯 Liquid Nanos") - -[Liquid Nanos](https://huggingface.co/collections/LiquidAI/liquid-nanos-68b98d898414dd94d4d5f99a) are task-specific models fine-tuned for specialized use cases. - -| Model | Description | -| ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`LiquidAI/LFM2-1.2B-Extract`](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract) | Extract important information from a wide variety of unstructured documents into structured outputs like JSON. [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-extract) | -| [`LiquidAI/LFM2-350M-Extract`](https://huggingface.co/LiquidAI/LFM2-350M-Extract) | Smaller version of the extraction model. [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-extract) | -| [`LiquidAI/LFM2-350M-ENJP-MT`](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT) | Near real-time bi-directional Japanese/English translation of short-to-medium inputs. > [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-350m-enjp-mt) | -| [`LiquidAI/LFM2-1.2B-RAG`](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG) | Answer questions based on provided contextual documents, for use in RAG systems. > [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-rag) | -| [`LiquidAI/LFM2-350M-Math`](https://huggingface.co/LiquidAI/LFM2-350M-Math) | Tiny reasoning model designed for tackling tricky math problems. | -| [`LiquidAI/LFM2-350M-PII-Extract-JP`](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP) | Extract personally identifiable information (PII) from Japanese text and output it in JSON format. [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-350m-pii-extract-jp) | -| [`LiquidAI/LFM2-ColBERT-350M`](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M) | Embed documents and queries for fast retrieval and reranking across many languages. | -| [`LiquidAI/LFM2-2.6B-Transcript`](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript) | Designed for private, on-device meeting summarization. [See prompting guidelines](/docs/key-concepts/text-generation-and-prompting#lfm2-2.6b-transcript) | -| [`LiquidAI/LFM2-1.2B-Tool`](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool) | Deprecated Model optimized for concise and precise tool calling. See updated [`LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) instead. | - -## GGUF Models[​](#gguf-models "Direct link to GGUF Models") - -GGUF quantized versions are available for all LFM2 models for efficient inference with [llama.cpp](/docs/inference/llama-cpp), [LM Studio](/docs/inference/lm-studio), and [Ollama](/docs/inference/ollama). These models offer reduced memory usage and faster CPU inference. - -To access our official GGUF models, append `-GGUF` to any model repository name (e.g., `LiquidAI/LFM2-1.2B-GGUF`). All models are available in multiple quantization levels (`Q4_0`, `Q4_K_M`, `Q5_K_M`, `Q6_K`, `Q8_0`, `F16`). - -## MLX Models[​](#mlx-models "Direct link to MLX Models") - -MLX quantized versions are available for many of the LFM2 model library for efficient inference on Apple Silicon with [MLX](/docs/inference/mlx). These models leverage unified memory architecture for optimal performance on M-series chips. - -Browse all MLX-compatible models at [mlx-community LFM2 models](https://huggingface.co/mlx-community/collections?search=LFM). All models are available in multiple quantization levels (`4-bit`, `5-bit`, `6-bit`, `8-bit`, `bf16`). - -## ONNX Models[​](#onnx-models "Direct link to ONNX Models") - -ONNX versions are available for many LFM2 models for cross-platform deployment and inference with ONNX Runtime. These models enable deployment across diverse hardware including CPUs, GPUs, and specialized accelerators. - -To access our official ONNX models, append `-ONNX` to any model repository name (e.g., `LiquidAI/LFM2.5-1.2B-Instruct-ONNX`). diff --git a/docs/models/complete-library.mdx b/docs/models/complete-library.mdx index 64a2abd..c68325f 100644 --- a/docs/models/complete-library.mdx +++ b/docs/models/complete-library.mdx @@ -9,7 +9,7 @@ All of our models share the following capabilities: - 32k token context length for extended conversations and document processing - Designed for fast inference with [Transformers](/docs/inference/transformers), [llama.cpp](/docs/inference/llama-cpp), [vLLM](/docs/inference/vllm), [MLX](/docs/inference/mlx), [Ollama](/docs/inference/ollama), and [LEAP](/docs/frameworks/leap) -- Trainable via SFT, DPO, and GRPO with [LEAP Finetune](/docs/fine-tuning/leap-finetune), [TRL](/docs/fine-tuning/trl), and [Unsloth](/docs/fine-tuning/unsloth) +- Trainable via SFT, DPO, and GRPO with [TRL](/docs/fine-tuning/trl) and [Unsloth](/docs/fine-tuning/unsloth) @@ -59,35 +59,36 @@ Quantization reduces model size and speeds up inference with minimal quality los | ----- | -- | ---- | --- | ---- | ---------- | | **Text-to-text Models** | | | | | | | LFM2.5 Models (Latest Release) | | | | | | -| LFM2.5-1.2B-Instruct | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-ONNX) | Yes (TRL) | -| LFM2.5-1.2B-Thinking | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-ONNX) | Yes (TRL) | -| LFM2.5-1.2B-Base | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-GGUF) | ✗ | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-ONNX) | Yes (TRL) | -| LFM2.5-1.2B-JP | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-ONNX) | Yes (TRL) | +| [LFM2.5-1.2B-Instruct](/docs/models/lfm25-1.2b-instruct) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-ONNX) | Yes (TRL) | +| [LFM2.5-1.2B-Thinking](/docs/models/lfm25-1.2b-thinking) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-ONNX) | Yes (TRL) | +| [LFM2.5-1.2B-Base](/docs/models/lfm25-1.2b-base) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-GGUF) | ✗ | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Base-ONNX) | Yes (TRL) | +| [LFM2.5-1.2B-JP](/docs/models/lfm25-1.2b-jp) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-GGUF) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-MLX-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-1.2B-JP-ONNX) | Yes (TRL) | | LFM2 Models | | | | | | -| LFM2-8B-A1B | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B) | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-8B-A1B-8bit) | ✗ | Yes (TRL) | -| LFM2-2.6B | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-2.6B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-ONNX) | Yes (TRL) | -| LFM2-1.2B Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-1.2B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-ONNX) | Yes (TRL) | -| LFM2-700M | [✓](https://huggingface.co/LiquidAI/LFM2-700M) | [✓](https://huggingface.co/LiquidAI/LFM2-700M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-700M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-700M-ONNX) | Yes (TRL) | -| LFM2-350M | [✓](https://huggingface.co/LiquidAI/LFM2-350M) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ONNX) | Yes (TRL) | +| [LFM2-8B-A1B](/docs/models/lfm2-8b-a1b) | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B) | [✓](https://huggingface.co/LiquidAI/LFM2-8B-A1B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-8B-A1B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-8B-A1B-ONNX) | Yes (TRL) | +| [LFM2-2.6B](/docs/models/lfm2-2.6b) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-2.6B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-ONNX) | Yes (TRL) | +| [LFM2-2.6B-Exp](/docs/models/lfm2-2.6b-exp) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Exp) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Exp-GGUF) | ✗ | ✗ | Yes (TRL) | +| [LFM2-1.2B](/docs/models/lfm2-1.2b) Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-1.2B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-ONNX) | Yes (TRL) | +| [LFM2-700M](/docs/models/lfm2-700m) | [✓](https://huggingface.co/LiquidAI/LFM2-700M) | [✓](https://huggingface.co/LiquidAI/LFM2-700M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-700M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-700M-ONNX) | Yes (TRL) | +| [LFM2-350M](/docs/models/lfm2-350m) | [✓](https://huggingface.co/LiquidAI/LFM2-350M) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ONNX) | Yes (TRL) | | **Vision Language Models** | | | | | | | LFM2.5 Models (Latest Release) | | | | | | -| LFM2.5-VL-1.6B | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2.5-VL-1.6B-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-ONNX) | Yes (TRL) | +| [LFM2.5-VL-1.6B](/docs/models/lfm25-vl-1.6b) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2.5-VL-1.6B-8bit) | [✓](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-ONNX) | Yes (TRL) | | LFM2 Models | | | | | | -| LFM2-VL-3B | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-3B-8bit) | ✗ | Yes (TRL) | -| LFM2-VL-1.6B | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-1.6B-8bit) | ✗ | Yes (TRL) | -| LFM2-VL-450M | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-450M-8bit) | ✗ | Yes (TRL) | +| [LFM2-VL-3B](/docs/models/lfm2-vl-3b) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-3B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-3B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-VL-3B-ONNX) | Yes (TRL) | +| [LFM2-VL-1.6B](/docs/models/lfm2-vl-1.6b) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-1.6B-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-1.6B-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-VL-1.6B-ONNX) | Yes (TRL) | +| [LFM2-VL-450M](/docs/models/lfm2-vl-450m) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M) | [✓](https://huggingface.co/LiquidAI/LFM2-VL-450M-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-VL-450M-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-VL-450M-ONNX) | Yes (TRL) | | **Audio Models** | | | | | | | LFM2.5 Models (Latest Release) | | | | | | -| LFM2.5-Audio-1.5B | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-GGUF) | ✗ | ✗ | Yes (TRL) | +| [LFM2.5-Audio-1.5B](/docs/models/lfm25-audio-1.5b) | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-GGUF) | ✗ | [✓](https://huggingface.co/LiquidAI/LFM2.5-Audio-1.5B-ONNX) | Yes (TRL) | | LFM2 Models | | | | | | -| LFM2-Audio-1.5B | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B-GGUF) | ✗ | ✗ | No | +| [LFM2-Audio-1.5B](/docs/models/lfm2-audio-1.5b) | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B) | [✓](https://huggingface.co/LiquidAI/LFM2-Audio-1.5B-GGUF) | ✗ | ✗ | No | | **Liquid Nanos** | | | | | | -| LFM2-1.2B-Extract | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Extract-ONNX) | Yes (TRL) | -| LFM2-350M-Extract | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Extract-ONNX) | Yes (TRL) | -| LFM2-350M-ENJP-MT | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-ENJP-MT-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ENJP-MT-ONNX) | Yes (TRL) | -| LFM2-1.2B-RAG | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-RAG-ONNX) | Yes (TRL) | -| LFM2-1.2B-Tool Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Tool-ONNX) | Yes (TRL) | -| LFM2-350M-Math | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Math-ONNX) | Yes (TRL) | -| LFM2-350M-PII-Extract-JP | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP-GGUF) | ✗ | ✗ | Yes (TRL) | -| LFM2-ColBERT-350M | [✓](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M) | ✗ | ✗ | ✗ | Yes (PyLate) | -| LFM2-2.6B-Transcript | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-Transcript-ONNX) | Yes (TRL) | +| [LFM2-1.2B-Extract](/docs/models/lfm2-1.2b-extract) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Extract-ONNX) | Yes (TRL) | +| [LFM2-350M-Extract](/docs/models/lfm2-350m-extract) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Extract-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Extract-ONNX) | Yes (TRL) | +| [LFM2-350M-ENJP-MT](/docs/models/lfm2-350m-enjp-mt) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-ENJP-MT-GGUF) | [✓](https://huggingface.co/mlx-community/LFM2-350M-ENJP-MT-8bit) | [✓](https://huggingface.co/onnx-community/LFM2-350M-ENJP-MT-ONNX) | Yes (TRL) | +| [LFM2-1.2B-RAG](/docs/models/lfm2-1.2b-rag) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-RAG-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-RAG-ONNX) | Yes (TRL) | +| [LFM2-1.2B-Tool](/docs/models/lfm2-1.2b-tool) Deprecated | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool) | [✓](https://huggingface.co/LiquidAI/LFM2-1.2B-Tool-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-1.2B-Tool-ONNX) | Yes (TRL) | +| [LFM2-350M-Math](/docs/models/lfm2-350m-math) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-Math-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-350M-Math-ONNX) | Yes (TRL) | +| [LFM2-350M-PII-Extract-JP](/docs/models/lfm2-350m-pii-extract-jp) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP) | [✓](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP-GGUF) | ✗ | ✗ | Yes (TRL) | +| [LFM2-ColBERT-350M](/docs/models/lfm2-colbert-350m) | [✓](https://huggingface.co/LiquidAI/LFM2-ColBERT-350M) | ✗ | ✗ | ✗ | Yes (PyLate) | +| [LFM2-2.6B-Transcript](/docs/models/lfm2-2.6b-transcript) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript) | [✓](https://huggingface.co/LiquidAI/LFM2-2.6B-Transcript-GGUF) | ✗ | [✓](https://huggingface.co/onnx-community/LFM2-2.6B-Transcript-ONNX) | Yes (TRL) | diff --git a/docs/models/lfm2-8b-a1b.mdx b/docs/models/lfm2-8b-a1b.mdx index 1a99bc8..516efd9 100644 --- a/docs/models/lfm2-8b-a1b.mdx +++ b/docs/models/lfm2-8b-a1b.mdx @@ -11,6 +11,7 @@ LFM2-8B-A1B is Liquid AI's Mixture-of-Experts model, combining 8B total paramete HF GGUF MLX + ONNX ## Specifications diff --git a/docs/models/lfm2-vl-1.6b.mdx b/docs/models/lfm2-vl-1.6b.mdx index b331a7d..1bb45a6 100644 --- a/docs/models/lfm2-vl-1.6b.mdx +++ b/docs/models/lfm2-vl-1.6b.mdx @@ -15,6 +15,7 @@ LFM2-VL-1.6B was the original 1.6B vision-language model. It has been superseded HF GGUF MLX + ONNX ## Specifications @@ -132,4 +133,28 @@ LFM2-VL-1.6B was the original 1.6B vision-language model. It has been superseded print(outputs[0].outputs[0].text) ``` + + + llama.cpp enables efficient CPU inference for vision models. + + **Install:** + ```bash + brew install llama.cpp + ``` + + Or download pre-built binaries from [llama.cpp releases](https://github.com/ggml-org/llama.cpp/releases). + + **Run:** + ```bash + llama-cli \ + -hf LiquidAI/LFM2-VL-1.6B-GGUF:Q4_0 \ + --image test_image.jpg \ + -p "What's in this image?" \ + -n 128 + ``` + + The `-hf` flag downloads the model directly from Hugging Face. Use `--image-max-tokens` to control image token budget. + + For server deployment and advanced usage, see the [llama.cpp guide](/docs/inference/llama-cpp#vision-models). + diff --git a/docs/models/lfm2-vl-3b.mdx b/docs/models/lfm2-vl-3b.mdx index 009981f..eacbb18 100644 --- a/docs/models/lfm2-vl-3b.mdx +++ b/docs/models/lfm2-vl-3b.mdx @@ -11,6 +11,7 @@ LFM2-VL-3B is Liquid AI's highest-capacity multimodal model, delivering enhanced HF GGUF MLX + ONNX ## Specifications @@ -146,4 +147,28 @@ LFM2-VL-3B is Liquid AI's highest-capacity multimodal model, delivering enhanced print(outputs[0].outputs[0].text) ``` + + + llama.cpp enables efficient CPU inference for vision models. + + **Install:** + ```bash + brew install llama.cpp + ``` + + Or download pre-built binaries from [llama.cpp releases](https://github.com/ggml-org/llama.cpp/releases). + + **Run:** + ```bash + llama-cli \ + -hf LiquidAI/LFM2-VL-3B-GGUF:Q4_0 \ + --image test_image.jpg \ + -p "What's in this image?" \ + -n 128 + ``` + + The `-hf` flag downloads the model directly from Hugging Face. Use `--image-max-tokens` to control image token budget. + + For server deployment and advanced usage, see the [llama.cpp guide](/docs/inference/llama-cpp#vision-models). + diff --git a/docs/models/lfm2-vl-450m.mdx b/docs/models/lfm2-vl-450m.mdx index ddaa038..bbbc722 100644 --- a/docs/models/lfm2-vl-450m.mdx +++ b/docs/models/lfm2-vl-450m.mdx @@ -11,6 +11,7 @@ LFM2-VL-450M is Liquid AI's smallest vision-language model, designed for edge de HF GGUF MLX + ONNX ## Specifications @@ -146,4 +147,28 @@ LFM2-VL-450M is Liquid AI's smallest vision-language model, designed for edge de print(outputs[0].outputs[0].text) ``` + + + llama.cpp enables efficient CPU inference for vision models. + + **Install:** + ```bash + brew install llama.cpp + ``` + + Or download pre-built binaries from [llama.cpp releases](https://github.com/ggml-org/llama.cpp/releases). + + **Run:** + ```bash + llama-cli \ + -hf LiquidAI/LFM2-VL-450M-GGUF:Q4_0 \ + --image test_image.jpg \ + -p "What's in this image?" \ + -n 128 + ``` + + The `-hf` flag downloads the model directly from Hugging Face. Use `--image-max-tokens` to control image token budget. + + For server deployment and advanced usage, see the [llama.cpp guide](/docs/inference/llama-cpp#vision-models). + diff --git a/docs/models/lfm25-audio-1.5b.mdx b/docs/models/lfm25-audio-1.5b.mdx index a04acdd..25ad96d 100644 --- a/docs/models/lfm25-audio-1.5b.mdx +++ b/docs/models/lfm25-audio-1.5b.mdx @@ -10,6 +10,7 @@ LFM2.5-Audio-1.5B is Liquid AI's flagship audio model, featuring a custom LFM-ba
HF GGUF + ONNX
## Specifications diff --git a/docs/models/lfm25-vl-1.6b.mdx b/docs/models/lfm25-vl-1.6b.mdx index 61cbccf..2407bac 100644 --- a/docs/models/lfm25-vl-1.6b.mdx +++ b/docs/models/lfm25-vl-1.6b.mdx @@ -147,4 +147,28 @@ LFM2.5-VL-1.6B is Liquid AI's flagship vision-language model, delivering excepti print(outputs[0].outputs[0].text) ``` + + + llama.cpp enables efficient CPU inference for vision models. + + **Install:** + ```bash + brew install llama.cpp + ``` + + Or download pre-built binaries from [llama.cpp releases](https://github.com/ggml-org/llama.cpp/releases). + + **Run:** + ```bash + llama-cli \ + -hf LiquidAI/LFM2.5-VL-1.6B-GGUF:Q4_0 \ + --image test_image.jpg \ + -p "What's in this image?" \ + -n 128 + ``` + + The `-hf` flag downloads the model directly from Hugging Face. Use `--image-max-tokens` to control image token budget. + + For server deployment and advanced usage, see the [llama.cpp guide](/docs/inference/llama-cpp#vision-models). + diff --git a/leap/find-model.mdx b/leap/find-model.mdx deleted file mode 100644 index d345074..0000000 --- a/leap/find-model.mdx +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: "Find the best model for your use case" -description: "Choosing the right model is crucial for achieving optimal performance in your edge AI application. The model you select impacts everything from inference speed and memory usage to the quality of responses and compatibility with your target hardware." ---- - -LFM2 offers a diverse range of models optimized for different use cases, from lightweight models perfect for resource-constrained environments to more capable models that deliver state-of-the-art performance. - - - - - - - -[Edit this page](https://github.com/Liquid4All/docs/tree/main/leap/find-model.mdx) diff --git a/leap/finetuning.mdx b/leap/finetuning.mdx deleted file mode 100644 index d4be7d1..0000000 --- a/leap/finetuning.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: "Overview" -description: "Part of the power of SLMs, and LFM2 in particular, is their adaptability to specific use cases or tasks. One way to adapt models to your specific use case is via finetuning – or training the model on an additional set of data specific to your task." ---- - -This guide is a starting point for running Supervised Fine-Tuning (SFT) or Direct Preference Optimization (DPO) on LFM2 models. The same techniques can be applied to any model, but **the tools provided in LEAP are so far only tested thoroughly for compatability with LFM2.** - - - It's important to note that a critical aspect of finetuning is the dataset that is used; finetuning on poor quality data can even hinder model performance. For more information on what makes an effective dataset, check the documentation [here](https://github.com/mlabonne/llm-datasets). - - -## ≤ 1 GPUs[​](#-1-gpus "Direct link to ≤ 1 GPUs") - -If you don't have your own GPUs to run finetuning, don't worry – Liquid has developed a set of easy-to-use Jupyter notebooks in conjunction with our friends at Unsloth and Axolotl to enable easily finetuning LFM2 models in Google Colab on a single GPU. You can find the notebooks here: - -| Notebook | Description | Link | -| ------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DPO (TRL) | Preference alignment with Direct Preference Optimization (DPO) using TRL. | [![colab-icon](/images/leap/-lobehub/icons-static-png-1.55.0/light/colab-color.png)](https://colab.research.google.com/drive/1MQdsPxFHeZweGsNx4RH7Ia8lG8PiGE1t) | -| SFT (TRL) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using TRL. | [![colab-icon](/images/leap/-lobehub/icons-static-png-1.55.0/light/colab-color.png)](https://colab.research.google.com/drive/1j5Hk_SyBb2soUsuhU0eIEA9GwLNRnElF) | -| SFT (Unsloth) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using Unsloth. | [![colab-icon](/images/leap/-lobehub/icons-static-png-1.55.0/light/colab-color.png)](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Liquid_LFM2_\(1.2B\)-Conversational.ipynb) | -| SFT (Axolotl) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using Axolotl. | [![colab-icon](/images/leap/-lobehub/icons-static-png-1.55.0/light/colab-color.png)](https://colab.research.google.com/drive/155lr5-uYsOJmZfO6_QZPjbs8hA_v8S7t) | - -## > 1 GPUs[​](#-1-gpus-1 "Direct link to > 1 GPUs") - -If you have your own GPUs, you can use Liquid's `leap-finetune` package [here](https://github.com/Liquid4All/leap-finetune). `leap-finetune` simplifies the process of finetuning LFM2 models by allowing you to (1) provide your own data loader, (2) specify your training configuration, and (3) hit run. The tool is fully built with open source tools and handles distributed training up to a single node (e.g. 8 GPUs). - -## Support[​](#support "Direct link to Support") - -If you encounter an issue with the finetuning materials above, or if you have a feature request, please reach out to us! You can join our [Discord](https://discord.gg/liquid-ai), submit a GitHub issue, or send us a note at [support@liquid.ai](mailto:support@liquid.ai). - -[Edit this page](https://github.com/Liquid4All/docs/tree/main/leap/finetuning.mdx) diff --git a/leap/index.mdx b/leap/index.mdx deleted file mode 100644 index 45a13f1..0000000 --- a/leap/index.mdx +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: "Welcome to LEAP!" -description: "Liquid Edge AI Platform (LEAP) is an integrated toolchain that enables developers to effectively specialize and deploy small language models to their use case on their hardware." ---- - -LEAP is designed to help any app developer – regardless of their familiarity with AI – to unlock the best quality and fastest performance for their edge AI task. Our end-to-end ecosystem of tools simplifies the entire edge AI app development process: from model discovery, through customization, to deployment. - -## 1. Find the model you need[​](#1-find-the-model-you-need "Direct link to 1. Find the model you need") - - - - - - - -## 2. Vibe-check the model on device with 0 setup[​](#2-vibe-check-the-model-on-device-with-0-setup "Direct link to 2. Vibe-check the model on device with 0 setup") - - - - - - - -## 3. Deploy the model to your target device[​](#3-deploy-the-model-to-your-target-device "Direct link to 3. Deploy the model to your target device") - - - - - - - -## 4. Customize the model to boost performance[​](#4-customize-the-model-to-boost-performance "Direct link to 4. Customize the model to boost performance") - - - - - - - -[Edit this page](https://github.com/Liquid4All/docs/tree/main/leap/index.mdx) diff --git a/leap/laptop-support.mdx b/leap/laptop-support.mdx deleted file mode 100644 index 1638375..0000000 --- a/leap/laptop-support.mdx +++ /dev/null @@ -1,104 +0,0 @@ ---- -title: "Laptop Support" ---- - -## Our approach[​](#our-approach "Direct link to Our approach") - -There are a variety of ways to run SLMs on laptop and desktop computers already, with a variety of support for different operating systems, chip architectures, hardware backends, and more. - -To support this large (and growing) ecosystem, we are actively collaborating with, supporting, and contributing to the open source community to provide efficient and fast inference performance on as many systems as possible – especially for Liquid Foundation Models (LFMs). - -Specifically, for most users and environments, we recommend and support [llama.cpp](https://github.com/ggml-org/llama.cpp) as the primary engine for running LFMs and other LEAP models on Windows, Mac, and Linux machines. - -We will continue to evaluate different inference engines and update our recommendation, potentially on a per-platform basis, on a regular basis. - -## Running on laptops/desktops[​](#running-on-laptopsdesktops "Direct link to Running on laptops/desktops") - -There are two main ways to run models via llama.cpp: - -* Run as an OpenAI-compatible HTTP server -* Via language-specific library bindings - -### OpenAI-compatible server[​](#openai-compatible-server "Direct link to OpenAI-compatible server") - -If running an HTTP server locally on your hardware is sufficient for your use case, using llama.cpp directly via command line is the fastest/easiest way to get started. You can use the tables below to determine which `llama.cpp` binding is best for your environment and download the relevant binary (version `b7075`), or browse all releases and find the latest version [here](https://github.com/ggml-org/llama.cpp/releases). - -**Windows** - -| Hardware | Binary Name | Download Link | -| ----------------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------- | -| Nvidia GPU | llama-b7075-bin-win-cuda-12.4-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-cuda-12.4-x64.zip) | -| Intel GPU | llama-b7075-bin-win-sycl-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-sycl-x64.zip) | -| AMD GPU | llama-b7075-bin-win-vulkan-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-vulkan-x64.zip) | -| Other GPU | llama-b7075-bin-win-vulkan-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-vulkan-x64.zip) | -| Qualcomm Snapdragon CPU | llama-b7075-bin-win-cpu-arm64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-cpu-arm64.zip) | -| Other (CPU-only) | llama-b7075-bin-win-cpu-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-win-cpu-x64.zip) | - -**macOS** - -| Hardware | Binary Name | Download Link | -| ------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------- | -| Intel | llama-b7075-bin-macos-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-macos-x64.zip) | -| Apple Silicon | llama-b7075-bin-macos-arm64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-macos-arm64.zip) | - -**Ubuntu** - -| Hardware | Binary Name | Download Link | -| -------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------- | -| GPU | llama-b7075-bin-ubuntu-vulkan-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-ubuntu-vulkan-x64.zip) | -| CPU-only | llama-b7075-bin-ubuntu-x64.zip | [Download](https://github.com/ggml-org/llama.cpp/releases/download/b7075/llama-b7075-bin-ubuntu-x64.zip) | - -Once you have your appropriate binary, you can use it to run any GGUF file downloaded from the LEAP model library (or elsewhere). For more detailed instructions see the llama.cpp [quickstart guide](https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#quick-start). - -If you are considering investing in hardware, here are some profiling results from a variety of machines and inference backends. As it currently stands, AMD Ryzen™ machines generally have the best-in-class performance with relatively standard llama.cpp configuration settings – and with custom configurations, this advantage tends to increase. - -| Device | Prefill speed (tok/s) | Decode speed (tok/s) | -| ------------------------------- | --------------------- | -------------------- | -| AMD Ryzen™ AI Max+ 395 | 5476 | 143 | -| AMD Ryzen™ AI 9 HX 370 | 2680 | 113 | -| Apple Mac Mini (M4) | 1427 | 122 | -| Qualcomm Snapdragon™ X1E-78-100 | 978 | 125 | -| Intel Core™ Ultra 9 185H | 1310 | 58 | -| Intel Core™ Ultra 7 258V | 1104 | 78 | - -Note: for fair comparison, we conducted these benchmarks on the same model (`LFM2-1.2B-Q4_0.gguf`). For each hardware device, we also tested across all publicly available llama.cpp binaries, with different thread counts (4, 8, 12) for CPU runners, and took the best performing numbers for prefill and decode independently. - -### Language bindings[​](#language-bindings "Direct link to Language bindings") - -If you are interested in a more embedded inference experience (e.g. for an application), there are a variety of open-source language bindings that you can use within your particular project. These bindings generally act as any other importable library within your application source code. - - - While language bindings can often provide better performance and more control, support and maturity can be inconsistent across different languages/libraries, especially for newer model architectures. Take note of the most recent update to the publicly released version of each binding when considering what to use. - - -We have provided some very simple examples that demonstrate how to use three of the more popular language bindings. Getting started is as easy as copy-pasting the below code into the appropriate runtime environment, downloading a GGUF file, and pointing `PATH_TO_MODEL` to that file: - - - - ``` - import { getLlama, LlamaChatSession } from "node-llama-cpp";const llama = await getLlama();const model = await llama.loadModel({ modelPath: "PATH_TO_MODEL", gpuLayers: 0 });const context = await model.createContext({ contextSize: 2048 });const session = new LlamaChatSession({ contextSequence: context.getSequence(), systemPrompt: "You are a helpful AI assistant. Provide clear, concise, accurate responses."});const response = await session.prompt("Why is the sky blue?");console.log(response); - ``` - - - - ``` - from llama_cpp import Llamamodel = Llama(model_path="PATH_TO_MODEL", verbose=False)history = [{ "role": "system", "content": "You are a helpful AI assistant. Provide clear, concise, accurate responses."}]history.append({ "role": "user", "content": "Why is the sky blue?"})output = model.create_chat_completion(messages=history)response = output["choices"][0]["message"]["content"]print(response) - ``` - - - - ``` - using LLama;using LLama.Common;var parameters = new ModelParams("PATH_TO_MODEL"){ ContextSize = 2048, GpuLayerCount = 0,};using var model = LLamaWeights.LoadFromFile(parameters);using var context = model.CreateContext(parameters);var executor = new InteractiveExecutor(context);var chatHistory = new ChatHistory();ChatSession session = new(executor, chatHistory);InferenceParams inferenceParams = new InferenceParams(){ MaxTokens = 2048, AntiPrompts = new List { "User:" }};session.AddSystemMessage("You are a helpful AI assistant. Provide clear, concise, accurate responses.");string userInput = "Why is the sky blue?";await foreach ( var text in session.ChatAsync( new ChatHistory.Message(AuthorRole.User, userInput), inferenceParams )){ Console.Write(text);} - ``` - - - -For more comprehensive example apps, check out the follow repositories: - -* [JavaScript (NodeJS) example](https://github.com/Liquid4All/leap-llamacpp-electron-example) -* [Python example](https://github.com/Liquid4All/leap-llamacpp-python-example) -* [C# example](https://github.com/Liquid4All/leap-llamacpp-csharp-example) - -The full list of llama.cpp language bindings can be found [here](https://github.com/ggml-org/llama.cpp?tab=readme-ov-file#description). - -[Edit this page](https://github.com/Liquid4All/docs/tree/main/leap/laptop-support.mdx) diff --git a/leap/vibe-check-models.mdx b/leap/vibe-check-models.mdx deleted file mode 100644 index 5e84442..0000000 --- a/leap/vibe-check-models.mdx +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: "Vibe-check models" -description: "Before integrating a model into your application, it's essential to test its performance and behavior with your specific use cases. \"Vibe-checking\" allows you to evaluate how well a model responds to your prompts, assess response quality, and understand its capabilities and limitations." ---- - -LEAP provides multiple ways to interact with and test LFM2 models without any setup or configuration. Whether you prefer testing on your device or in the cloud, you can quickly validate that a model meets your requirements before moving to full deployment. - - - - - - - -[Edit this page](https://github.com/Liquid4All/docs/tree/main/leap/vibe-check-models.mdx) diff --git a/style.css b/style.css index 4814ff5..5beb72c 100644 --- a/style.css +++ b/style.css @@ -416,6 +416,33 @@ table a:hover { background-color: rgba(134, 75, 196, 0.15) !important; } +/* Model name links (first column) - subtle text, no underline, just hover effect */ +table:has(thead th:nth-child(6):not(:nth-child(7))) td:first-child a { + color: inherit !important; + font-weight: 500 !important; + padding: 0.25rem 0.4rem !important; + margin: -0.25rem -0.4rem !important; + border-radius: 4px !important; + background-image: none !important; + text-decoration: none !important; + transition: background-color 0.15s ease, color 0.15s ease !important; +} + +table:has(thead th:nth-child(6):not(:nth-child(7))) td:first-child a:hover { + color: #864bc4 !important; + background-color: rgba(134, 75, 196, 0.1) !important; + background-image: none !important; +} + +.dark table:has(thead th:nth-child(6):not(:nth-child(7))) td:first-child a { + color: inherit !important; +} + +.dark table:has(thead th:nth-child(6):not(:nth-child(7))) td:first-child a:hover { + color: #a78bfa !important; + background-color: rgba(167, 139, 250, 0.15) !important; +} + /* Dark mode table styling */ .dark table thead tr { background: linear-gradient(135deg, #4c1d95 0%, #5b21b6 100%) !important;