Update support matrices (#1232)

boe20211 · web-flow · commit 05e4b16e64df · 2025-12-05T22:54:46.000-08:00
Signed-off-by: Teresa Chen &lt;boe20211@gmail.com&gt;
diff --git a/docs/recommended_models_features.md b/docs/recommended_models_features.md
@@ -24,3 +24,21 @@ These tables show the models currently tested for accuracy and performance.
 This table shows the features currently tested for accuracy and performance.
 
 {{ read_csv('../support_matrices/feature_support_matrix.csv', keep_default_na=False) }}
+
+## Kernel Support
+
+This table shows the current kernel support status.
+
+{{ read_csv('../support_matrices/kernel_support_matrix.csv', keep_default_na=False) }}
+
+## Parallelism Support
+
+This table shows the current parallelism support status.
+
+{{ read_csv('../support_matrices/parallelism_support_matrix.csv', keep_default_na=False) }}
+
+## Quantization Support
+
+This table shows the current quantization support status.
+
+{{ read_csv('../support_matrices/quantization_support_matrix.csv', keep_default_na=False) }}
diff --git a/support_matrices/feature_support_matrix.csv b/support_matrices/feature_support_matrix.csv
@@ -1,11 +1,16 @@
 Feature,CorrectnessTest,PerformanceTest
-"Collective Communication Matmul",✅,N/A
-"Prefix Caching",✅,✅
-"Multimodal Inputs",✅,✅
-"Quantized Matmul Attention and KV Cache",✅,✅
 "Chunked Prefill",✅,✅
-"JAX-Path Qxix Quantization",✅,✅
+"DCN-based P/D disaggregation",to be added,to be added
+"KV cache host offloading",to be added,to be added
+"Llama 4 Maverick",to be added,to be added
+"LoRA_Torch",✅,to be added
+"Multimodal Inputs",✅,✅
+"Out-of-tree model support",✅,✅
+"Prefix Caching",✅,✅
 "Single Program Multi Data",✅,✅
+"Speculative Decoding: Eagle3",✅,✅
 "Speculative Decoding: Ngram",✅,✅
-"Structured Decoding",✅,N/A
-"Ragged Paged Attention V3",✅,✅
+"async scheduler",✅,✅
+"runai_model_streamer_loader",✅,N/A
+"sampling_params",✅,N/A
+"structured_decoding",✅,N/A
diff --git a/support_matrices/kernel_support_matrix.csv b/support_matrices/kernel_support_matrix.csv
@@ -0,0 +1,8 @@
+Feature,CorrectnessTest,PerformanceTest
+"Collective Communication Matmul",✅,to be added
+"MLA",to be added,to be added
+"MoE",to be added,to be added
+"Quantized Attention",to be added,to be added
+"Quantized KV Cache",to be added,to be added
+"Quantized Matmul",to be added,to be added
+"Ragged Paged Attention V3",✅,✅
diff --git a/support_matrices/parallelism_support_matrix.csv b/support_matrices/parallelism_support_matrix.csv
@@ -0,0 +1,7 @@
+Feature,CorrectnessTest,PerformanceTest
+"CP",to be added,to be added
+"DP",❌,N/A
+"EP",to be added,to be added
+"PP",✅,✅
+"SP",to be added,to be added
+"TP",to be added,to be added
diff --git a/support_matrices/quantization_support_matrix.csv b/support_matrices/quantization_support_matrix.csv
@@ -0,0 +1,7 @@
+Feature,Recommended TPU Generations,CorrectnessTest,PerformanceTest
+"AWQ INT4","v5, v6",to be added,to be added
+"FP4 W4A16",v7,to be added,to be added
+"FP8 W8A8",v7,to be added,to be added
+"FP8 W8A16",v7,to be added,to be added
+"INT4 W4A16","v5, v6",to be added,to be added
+"INT8 W8A8","v5, v6",to be added,to be added
diff --git a/support_matrices/text_only_model_support_matrix.csv b/support_matrices/text_only_model_support_matrix.csv
@@ -1,7 +1,8 @@
 Model,UnitTest,IntegrationTest,Benchmark
 "meta-llama/Llama-3.3-70B-Instruct",✅,✅,✅
-"Qwen/Qwen3-32B",✅,✅,✅
+"Qwen/Qwen3-4B",✅,✅,✅
 "google/gemma-3-27b-it",✅,✅,✅
+"Qwen/Qwen3-32B",✅,✅,✅
+"meta-llama/Llama-Guard-4-12B",✅,✅,✅
 "meta-llama/Llama-3.1-8B-Instruct",✅,✅,✅
 "Qwen/Qwen3-30B-A3B",✅,✅,✅
-"Qwen/Qwen3-4B",✅,✅,✅