diff --git a/integrations/llama_cpp/examples/llama_cpp_generator_example.py b/integrations/llama_cpp/examples/llama_cpp_generator_example.py index 96f8aec1d7..6886819368 100644 --- a/integrations/llama_cpp/examples/llama_cpp_generator_example.py +++ b/integrations/llama_cpp/examples/llama_cpp_generator_example.py @@ -1,7 +1,7 @@ from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator generator = LlamaCppGenerator(model="openchat-3.5-1210.Q3_K_S.gguf", n_ctx=512, n_batch=128) -generator.warm_up() +# Components warm up automatically on first run. question = "Who is the best American actor?" prompt = f"GPT4 Correct User: {question} <|end_of_turn|> GPT4 Correct Assistant:" diff --git a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py index 074f4f7cf7..703c71ace7 100644 --- a/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py +++ b/integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/chat/chat_generator.py @@ -191,7 +191,6 @@ class LlamaCppChatGenerator: model_clip_path="mmproj-model-f16.gguf", # CLIP model n_ctx=4096 # Larger context for image processing ) - generator.warm_up() result = generator.run(messages) print(result)