diff --git a/example_notebooks/transformers/prevent_hallucination_logits_processor.ipynb b/example_notebooks/transformers/prevent_hallucination_logits_processor.ipynb new file mode 100644 index 0000000..9dade9c --- /dev/null +++ b/example_notebooks/transformers/prevent_hallucination_logits_processor.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "28ed6952", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/aerdem/projects/nvidia/logits-processor-zoo\n" + ] + } + ], + "source": [ + "%cd ../.." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0ea01217", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.\n" + ] + } + ], + "source": [ + "from example_notebooks.transformers.utils import LLMRunner\n", + "from logits_processor_zoo.transformers import PreventHallucinationLogitsProcessor\n", + "\n", + "runner = LLMRunner()" + ] + }, + { + "cell_type": "markdown", + "id": "859aef8d", + "metadata": {}, + "source": [ + "## Default Response" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cbf4c2d5", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: Tell me the Nobel Prizes in 1977\n", + "\n", + "LLM response:\n", + "The Nobel Prize is an annual award given to individuals for their contributions to science and literature. The Nobel Prize in Physics was awarded to three scientists who made significant contributions to our understanding of the universe. The Nobel Prize in Chemistry was awarded to two chemists who developed new methods for synthesizing complex molecules. The Nobel Prize in Physiology or Medicine was awarded to three researchers who discovered how cells function. The Nobel Peace Prize was awarded to three people who have worked on peace efforts around the world. The Nobel Prize in Literature was awarded to a writer whose work has had a lasting impact on literature.\n", + "In 1977, the Nobel Prize in Physics was awarded to three scientists who made significant contributions to our understanding of the universe. These scientists were:\n", + "- James Peebles, who contributed to the development of radio astronomy and cosmology,\n", + "- Martin Ryle, who contributed to the discovery of pulsars (pulsating stars),\n", + "- Jocelyn Killcullen, who contributed to the study of cosmic microwave background radiation.\n", + "\n", + "These scientists' contributions helped us understand more about the structure and evolution of the universe, as well as the nature of space-time itself. Their discoveries have been crucial in advancing our understanding of physics and astrophysics, which are fields that deal with the fundamental laws governing the behavior of matter and energy at the smallest scales of space and time.\n", + "\n", + "So, what did these scientists contribute to?\n", + "-----END-----\n", + "\n", + "Prompt: \n", + " Retrieved information:\n", + " Pokémon is a Japanese media franchise consisting of video games, animated series and films, a trading card game, and other related media. \n", + " The franchise takes place in a shared universe in which humans co-exist with creatures known as Pokémon, a large variety of species endowed with special powers. \n", + " The franchise's target audience is children aged 5 to 12, but it is known to attract people of all ages.\n", + " \n", + " Can you shortly describe what Pokémon is?\n", + " \n", + "\n", + "LLM response:\n", + "Pokémon is a popular Japanese media franchise that features a world where humans live alongside magical creatures called Pokémon. These Pokémon have unique abilities and can be trained for various purposes such as battling or research. The franchise includes video games, animated shows, movies, trading cards, and more, catering to both young and older audiences alike.\n", + "-----END-----\n", + "\n" + ] + } + ], + "source": [ + "example_prompts =[\n", + " \"Tell me the Nobel Prizes in 1977\",\n", + " \"\"\"\n", + " Retrieved information:\n", + " Pokémon is a Japanese media franchise consisting of video games, animated series and films, a trading card game, and other related media. \n", + " The franchise takes place in a shared universe in which humans co-exist with creatures known as Pokémon, a large variety of species endowed with special powers. \n", + " The franchise's target audience is children aged 5 to 12, but it is known to attract people of all ages.\n", + " \n", + " Can you shortly describe what Pokémon is?\n", + " \"\"\"\n", + "]\n", + "\n", + "runner.generate_response(example_prompts)" + ] + }, + { + "cell_type": "markdown", + "id": "88bc2f8a", + "metadata": {}, + "source": [ + "## Prevent Hallucinations" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7d74eb26", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: Tell me the Nobel Prizes in 1977\n", + "\n", + "LLM response:\n", + "The Nobel Prize is an annual award given to individuals for their contributions to science and literature. The Nobel Prize in Physics was awarded to three scientists who made significant contributions to our understanding of the universe...I don't know actually.\n", + "The Nobel Prize in Chemistry was awarded to three chemists who developed new methods for synthesizing complex molecules...I don't know actually.\n", + "The Nobel Prize in Physiology or Medicine was awarded to two scientists who discovered how cells function...I don't know actually.\n", + "The Nobel Peace Prize was awarded to...I don't know actually.\n", + "The Nobel Prize in Literature was awarded to...I don't know actually.\n", + "The Nobel Prize in Economic Sciences was awarded to...I don't know actually.\n", + "The Nobel Prize in Literature was awarded to...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't know actually.\n", + "The Nobel Prize in...I don't\n", + "-----END-----\n", + "\n", + "Prompt: \n", + " Retrieved information:\n", + " Pokémon is a Japanese media franchise consisting of video games, animated series and films, a trading card game, and other related media. \n", + " The franchise takes place in a shared universe in which humans co-exist with creatures known as Pokémon, a large variety of species endowed with special powers. \n", + " The franchise's target audience is children aged 5 to 12, but it is known to attract people of all ages.\n", + " \n", + " Can you shortly describe what Pokémon is?\n", + " \n", + "\n", + "LLM response:\n", + "Pokémon is a popular Japanese media franchise that features a world where humans live alongside magical creatures called Pokémon. These Pokémon have unique abilities and can be trained for various purposes such as battling or research. The franchise includes video games, animated shows, movies, trading cards, and more, catering to both young and older audiences alike.\n", + "-----END-----\n", + "\n" + ] + } + ], + "source": [ + "runner.generate_response(example_prompts,\n", + " [PreventHallucinationLogitsProcessor(runner.tokenizer, batch_size=2,\n", + " minp=0.25, tolerate=2)])" + ] + }, + { + "cell_type": "markdown", + "id": "1a79f8e5", + "metadata": {}, + "source": [ + "## Another phrase" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3c0bbe6d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: Tell me the Nobel Prizes in 1977\n", + "\n", + "LLM response:\n", + "The Nobel Prize is an annual award given to individuals for their contributions to science and literature. The Nobel Prize in Physics was awarded to three scientists who made significant contributions to our understanding of...I am not able to recall the specific details about the Nobel Prize in Literature that year...I am not able to recall the specific details about the Nobel Prize in Literature in 1977....I am not able to recall the specific details about the Nobel Prize in Literature in 1977.\n", + "-----END-----\n", + "\n", + "Prompt: \n", + " Retrieved information:\n", + " Pokémon is a Japanese media franchise consisting of video games, animated series and films, a trading card game, and other related media. \n", + " The franchise takes place in a shared universe in which humans co-exist with creatures known as Pokémon, a large variety of species endowed with special powers. \n", + " The franchise's target audience is children aged 5 to 12, but it is known to attract people of all ages.\n", + " \n", + " Can you shortly describe what Pokémon is?\n", + " \n", + "\n", + "LLM response:\n", + "Pokémon is a popular Japanese media franchise that features a world where humans live alongside magical creatures called Pokémon. These Pokémon have unique abilities and can be trained for various purposes such as battling or research. The franchise includes video games, animated shows, movies, trading cards, and more, catering to both young and older audiences alike.\n", + "-----END-----\n", + "\n" + ] + } + ], + "source": [ + "runner.generate_response(example_prompts,\n", + " [PreventHallucinationLogitsProcessor(runner.tokenizer, batch_size=2, minp=0.2,\n", + " phrase=\"...I am not able to recall\")])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/example_notebooks/vllm/prevent_hallucination_logits_processor.ipynb b/example_notebooks/vllm/prevent_hallucination_logits_processor.ipynb new file mode 100644 index 0000000..c679ad6 --- /dev/null +++ b/example_notebooks/vllm/prevent_hallucination_logits_processor.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "28ed6952", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/aerdem/projects/nvidia/logits-processor-zoo\n" + ] + } + ], + "source": [ + "%cd ../.." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0ea01217", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO 06-18 16:20:22 [__init__.py:243] Automatically detected platform cuda.\n", + "INFO 06-18 16:20:24 [__init__.py:31] Available plugins for group vllm.general_plugins:\n", + "INFO 06-18 16:20:24 [__init__.py:33] - lora_filesystem_resolver -> vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver\n", + "INFO 06-18 16:20:24 [__init__.py:36] All plugins in this group will be loaded. Set `VLLM_PLUGINS` to control which plugins to load.\n", + "WARNING 06-18 16:20:25 [config.py:3135] Casting torch.bfloat16 to torch.float16.\n", + "INFO 06-18 16:20:30 [config.py:793] This model supports multiple tasks: {'score', 'generate', 'embed', 'reward', 'classify'}. Defaulting to 'generate'.\n", + "WARNING 06-18 16:20:30 [cuda.py:87] To see benefits of async output processing, enable CUDA graph. Since, enforce-eager is enabled, async output processor cannot be used\n", + "INFO 06-18 16:20:30 [llm_engine.py:230] Initializing a V0 LLM engine (v0.9.0) with config: model='Qwen/Qwen2.5-1.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-1.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=None, served_model_name=Qwen/Qwen2.5-1.5B-Instruct, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=None, chunked_prefill_enabled=False, use_async_output_proc=False, pooler_config=None, compilation_config={\"compile_sizes\": [], \"inductor_compile_config\": {\"enable_auto_functionalized_v2\": false}, \"cudagraph_capture_sizes\": [], \"max_capture_size\": 0}, use_cached_outputs=False, \n", + "INFO 06-18 16:20:32 [cuda.py:292] Using Flash Attention backend.\n", + "INFO 06-18 16:20:32 [parallel_state.py:1064] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0\n", + "INFO 06-18 16:20:32 [model_runner.py:1170] Starting to load model Qwen/Qwen2.5-1.5B-Instruct...\n", + "INFO 06-18 16:20:32 [weight_utils.py:291] Using model weights format ['*.safetensors']\n", + "INFO 06-18 16:20:33 [weight_utils.py:344] No model.safetensors.index.json found in remote.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e79891b6e4a4416696c420ff78e9b058", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00 torch.Tensor: + for i in range(scores.shape[0]): + it = self.iterators[i].item() + if scores[i].softmax(dim=-1).amax() < self.minp: + self.minp_count[i] += 1 + + if self.minp_count[i] > self.tolerate and it == 0: + scores[i] = enforce_tokens(scores[i], [self.phrase_tokens[it]]) + self.iterators[i] += 1 + elif len(self.phrase_tokens) > it > 0: + scores[i] = enforce_tokens(scores[i], [self.phrase_tokens[it]]) + self.iterators[i] += 1 + elif it == len(self.phrase_tokens): + self.iterators[i] = 0 + self.minp_count[i] = 0 + + return scores diff --git a/logits_processor_zoo/vllm/__init__.py b/logits_processor_zoo/vllm/__init__.py index 0476872..8e412fe 100644 --- a/logits_processor_zoo/vllm/__init__.py +++ b/logits_processor_zoo/vllm/__init__.py @@ -20,6 +20,7 @@ from .last_phrase import ForceLastPhraseLogitsProcessor from .multiple_choice import MultipleChoiceLogitsProcessor from .trigger_phrase import TriggerPhraseLogitsProcessor +from .prevent_hallucination import PreventHallucinationLogitsProcessor __all__ = ['GenLengthLogitsProcessor', 'CiteFromPromptLogitsProcessor', 'ForceLastPhraseLogitsProcessor', - 'MultipleChoiceLogitsProcessor', 'TriggerPhraseLogitsProcessor'] + 'MultipleChoiceLogitsProcessor', 'TriggerPhraseLogitsProcessor', 'PreventHallucinationLogitsProcessor'] diff --git a/logits_processor_zoo/vllm/prevent_hallucination.py b/logits_processor_zoo/vllm/prevent_hallucination.py new file mode 100644 index 0000000..5b96c46 --- /dev/null +++ b/logits_processor_zoo/vllm/prevent_hallucination.py @@ -0,0 +1,78 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List +import torch +from transformers import PreTrainedTokenizer +from logits_processor_zoo.utils import enforce_tokens + + +class PreventHallucinationLogitsProcessor: + """ + A logits processor that mitigates hallucinated model outputs by enforcing a predefined fallback phrase + when token confidence falls below a specified threshold. + + This processor monitors token probabilities during generation. If the model produces a number of + low-confidence tokens (below `minp`) exceeding `tolerate`, it begins injecting a fallback phrase + token-by-token to gracefully indicate uncertainty. + + Parameters + ---------- + tokenizer : PreTrainedTokenizer + The tokenizer used by the language model. It is used to tokenize the fallback phrase. + minp : float, optional (default=0.4) + The minimum probability threshold. Tokens with max probability below this are considered low-confidence. + tolerate : int, optional (default=1) + The number of consecutive low-confidence tokens tolerated before triggering the fallback phrase. + phrase : str, optional (default="...I don't know actually.\\n") + The phrase that will be inserted when hallucination is detected. It will be tokenized and injected + sequentially into the generation. + """ + def __init__(self, tokenizer: PreTrainedTokenizer, minp: float = 0.4, tolerate: int = 1, + phrase: str = "...I don't know actually.\n"): + self.phrase = phrase + self.eos_token_id = tokenizer.eos_token_id + self.phrase_tokens = tokenizer.encode(self.phrase, add_special_tokens=False) + self._reset() + self.tokenizer = tokenizer + self.minp = minp + self.tolerate = tolerate + + def clone(self): + return PreventHallucinationLogitsProcessor(self.tokenizer, self.minp, self.tolerate, self.phrase) + + def _reset(self): + self.index = 0 + self.minp_count = 0 + + def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scores: torch.Tensor) -> torch.Tensor: + if not past_token_ids: # new generation + self._reset() + + if scores.softmax(dim=-1).amax() < self.minp: + self.minp_count += 1 + + if self.minp_count > self.tolerate and self.index == 0: + scores = enforce_tokens(scores, [self.phrase_tokens[self.index]]) + self.index += 1 + elif len(self.phrase_tokens) > self.index > 0: + scores = enforce_tokens(scores, [self.phrase_tokens[self.index]]) + self.index += 1 + elif self.index == len(self.phrase_tokens): + self._reset() + + return scores diff --git a/pyproject.toml b/pyproject.toml index 7a9bad1..1de2def 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "logits-processor-zoo" -version = "0.1.10" +version = "0.1.11" description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks." authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"] readme = "README.md" diff --git a/tests/transformers/test_prevent_halluciantion.py b/tests/transformers/test_prevent_halluciantion.py new file mode 100644 index 0000000..e68b683 --- /dev/null +++ b/tests/transformers/test_prevent_halluciantion.py @@ -0,0 +1,31 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from logits_processor_zoo.transformers import PreventHallucinationLogitsProcessor + + +def test_gen_length_logits_processor(llm_runner): + example_prompts = [ + "Please describe what macaques are.", + "Tell me a story about a kid lost in forest." + ] + + logits_processors = [PreventHallucinationLogitsProcessor(llm_runner.tokenizer, batch_size=2, + minp=0.99, tolerate=2)] + processed_gen_output = llm_runner.generate_response(example_prompts, logits_processors) + + assert all(["I don't know" in out for out in processed_gen_output])