ruff formatting

benjaminye · benjaminye · commit c9d0bc80348e · 2024-04-02T21:09:32.000-04:00
diff --git a/llmtune/data/dataset_generator.py b/llmtune/data/dataset_generator.py
@@ -61,12 +61,8 @@ def _format_one_prompt(self, example, is_test: bool = False):
         return example
 
     def _format_prompts(self):
-        self.dataset["train"] = self.dataset["train"].map(
-            partial(self._format_one_prompt, is_test=False)
-        )
-        self.dataset["test"] = self.dataset["test"].map(
-            partial(self._format_one_prompt, is_test=True)
-        )
+        self.dataset["train"] = self.dataset["train"].map(partial(self._format_one_prompt, is_test=False))
+        self.dataset["test"] = self.dataset["test"].map(partial(self._format_one_prompt, is_test=True))
 
     def get_dataset(self) -> Tuple[Dataset, Dataset]:
         self._train_test_split()
diff --git a/llmtune/data/ingestor.py b/llmtune/data/ingestor.py
@@ -13,9 +13,7 @@ def get_ingestor(data_type: str):
     elif data_type == "huggingface":
         return HuggingfaceIngestor
     else:
-        raise ValueError(
-            f"'type' must be one of 'json', 'csv', or 'huggingface', you have {data_type}"
-        )
+        raise ValueError(f"'type' must be one of 'json', 'csv', or 'huggingface', you have {data_type}")
 
 
 class Ingestor(ABC):
diff --git a/llmtune/finetune/lora.py b/llmtune/finetune/lora.py
@@ -94,9 +94,7 @@ def _inject_lora(self):
         self.model = get_peft_model(self.model, self._lora_config)
 
         if not self.config.accelerate:
-            self.optimizer = bnb.optim.Adam8bit(
-                self.model.parameters(), lr=self._training_args.learning_rate
-            )
+            self.optimizer = bnb.optim.Adam8bit(self.model.parameters(), lr=self._training_args.learning_rate)
             self.lr_scheduler = torch.optim.lr_scheduler.ConstantLR(self.optimizer)
         if self.config.accelerate:
             self.model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
diff --git a/llmtune/inference/lora.py b/llmtune/inference/lora.py
@@ -33,9 +33,7 @@ def __init__(
         self.device_map = self.config.model.device_map
         self._weights_path = dir_helper.save_paths.weights
 
-        self.model, self.tokenizer = self._get_merged_model(
-            dir_helper.save_paths.weights
-        )
+        self.model, self.tokenizer = self._get_merged_model(dir_helper.save_paths.weights)
 
     def _get_merged_model(self, weights_path: str):
         # purge VRAM
@@ -45,20 +43,14 @@ def _get_merged_model(self, weights_path: str):
         dtype = (
             torch.float16
             if self.config.training.training_args.fp16
-            else (
-                torch.bfloat16
-                if self.config.training.training_args.bf16
-                else torch.float32
-            )
+            else (torch.bfloat16 if self.config.training.training_args.bf16 else torch.float32)
         )
 
         self.model = AutoPeftModelForCausalLM.from_pretrained(
             weights_path,
             torch_dtype=dtype,
             device_map=self.device_map,
-            quantization_config=(
-                BitsAndBytesConfig(**self.config.model.bitsandbytes.model_dump())
-            ),
+            quantization_config=(BitsAndBytesConfig(**self.config.model.bitsandbytes.model_dump())),
         )
 
         """TODO: figure out multi-gpu
@@ -68,9 +60,7 @@ def _get_merged_model(self, weights_path: str):
 
         model = self.model.merge_and_unload()
 
-        tokenizer = AutoTokenizer.from_pretrained(
-            self._weights_path, device_map=self.device_map
-        )
+        tokenizer = AutoTokenizer.from_pretrained(self._weights_path, device_map=self.device_map)
 
         return model, tokenizer
 
@@ -81,9 +71,7 @@ def infer_all(self):
 
         # inference loop
         for idx, (prompt, label) in enumerate(zip(prompts, labels)):
-            RichUI.inference_ground_truth_display(
-                f"Generating on test set: {idx+1}/{len(prompts)}", prompt, label
-            )
+            RichUI.inference_ground_truth_display(f"Generating on test set: {idx+1}/{len(prompts)}", prompt, label)
 
             try:
                 result = self.infer_one(prompt)
@@ -101,9 +89,7 @@ def infer_all(self):
                 writer.writerow(row)
 
     def infer_one(self, prompt: str) -> str:
-        input_ids = self.tokenizer(
-            prompt, return_tensors="pt", truncation=True
-        ).input_ids.cuda()
+        input_ids = self.tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
 
         # stream processor
         streamer = TextIteratorStreamer(
@@ -113,9 +99,7 @@ def infer_one(self, prompt: str) -> str:
             timeout=60,  # 60 sec timeout for generation; to handle OOM errors
         )
 
-        generation_kwargs = dict(
-            input_ids=input_ids, streamer=streamer, **self.config.inference.model_dump()
-        )
+        generation_kwargs = dict(input_ids=input_ids, streamer=streamer, **self.config.inference.model_dump())
 
         thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
         thread.start()
diff --git a/llmtune/pydantic_models/config_model.py b/llmtune/pydantic_models/config_model.py
@@ -7,20 +7,15 @@
 # TODO: Refactor this into multiple files...
 HfModelPath = str
 
+
 class QaConfig(BaseModel):
-    llm_tests: Optional[List[str]] = Field([], description = "list of tests that needs to be connected")
+    llm_tests: Optional[List[str]] = Field([], description="list of tests that needs to be connected")
 
 
 class DataConfig(BaseModel):
-    file_type: Literal["json", "csv", "huggingface"] = Field(
-        None, description="File type"
-    )
-    path: Union[FilePath, HfModelPath] = Field(
-        None, description="Path to the file or HuggingFace model"
-    )
-    prompt: str = Field(
-        None, description="Prompt for the model. Use {} brackets for column name"
-    )
+    file_type: Literal["json", "csv", "huggingface"] = Field(None, description="File type")
+    path: Union[FilePath, HfModelPath] = Field(None, description="Path to the file or HuggingFace model")
+    prompt: str = Field(None, description="Prompt for the model. Use {} brackets for column name")
     prompt_stub: str = Field(
         None,
         description="Stub for the prompt; this is injected during training. Use {} brackets for column name",
@@ -47,9 +42,7 @@ class DataConfig(BaseModel):
 
 
 class BitsAndBytesConfig(BaseModel):
-    load_in_8bit: Optional[bool] = Field(
-        False, description="Enable 8-bit quantization with LLM.int8()"
-    )
+    load_in_8bit: Optional[bool] = Field(False, description="Enable 8-bit quantization with LLM.int8()")
     llm_int8_threshold: Optional[float] = Field(
         6.0, description="Outlier threshold for outlier detection in 8-bit quantization"
     )
@@ -60,9 +53,7 @@ class BitsAndBytesConfig(BaseModel):
         False,
         description="Enable splitting model parts between int8 on GPU and fp32 on CPU",
     )
-    llm_int8_has_fp16_weight: Optional[bool] = Field(
-        False, description="Run LLM.int8() with 16-bit main weights"
-    )
+    llm_int8_has_fp16_weight: Optional[bool] = Field(False, description="Run LLM.int8() with 16-bit main weights")
 
     load_in_4bit: Optional[bool] = Field(
         True,
@@ -85,14 +76,10 @@ class ModelConfig(BaseModel):
         "NousResearch/Llama-2-7b-hf",
         description="Path to the model (huggingface repo or local path)",
     )
-    device_map: Optional[str] = Field(
-        "auto", description="device onto which to load the model"
-    )
+    device_map: Optional[str] = Field("auto", description="device onto which to load the model")
 
     quantize: Optional[bool] = Field(False, description="Flag to enable quantization")
-    bitsandbytes: BitsAndBytesConfig = Field(
-        None, description="Bits and Bytes configuration"
-    )
+    bitsandbytes: BitsAndBytesConfig = Field(None, description="Bits and Bytes configuration")
 
     # @validator("hf_model_ckpt")
     # def validate_model(cls, v, **kwargs):
@@ -115,22 +102,12 @@ def set_device_map_to_none(cls, v, values, **kwargs):
 
 class LoraConfig(BaseModel):
     r: Optional[int] = Field(8, description="Lora rank")
-    task_type: Optional[str] = Field(
-        "CAUSAL_LM", description="Base Model task type during training"
-    )
+    task_type: Optional[str] = Field("CAUSAL_LM", description="Base Model task type during training")
 
-    lora_alpha: Optional[int] = Field(
-        16, description="The alpha parameter for Lora scaling"
-    )
-    bias: Optional[str] = Field(
-        "none", description="Bias type for Lora. Can be 'none', 'all' or 'lora_only'"
-    )
-    lora_dropout: Optional[float] = Field(
-        0.1, description="The dropout probability for Lora layers"
-    )
-    target_modules: Optional[List[str]] = Field(
-        None, description="The names of the modules to apply Lora to"
-    )
+    lora_alpha: Optional[int] = Field(16, description="The alpha parameter for Lora scaling")
+    bias: Optional[str] = Field("none", description="Bias type for Lora. Can be 'none', 'all' or 'lora_only'")
+    lora_dropout: Optional[float] = Field(0.1, description="The dropout probability for Lora layers")
+    target_modules: Optional[List[str]] = Field(None, description="The names of the modules to apply Lora to")
     fan_in_fan_out: Optional[bool] = Field(
         False,
         description="Flag to indicate if the layer to replace stores weight like (fan_in, fan_out)",
@@ -139,9 +116,7 @@ class LoraConfig(BaseModel):
         None,
         description="List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint",
     )
-    layers_to_transform: Optional[Union[List[int], int]] = Field(
-        None, description="The layer indexes to transform"
-    )
+    layers_to_transform: Optional[Union[List[int], int]] = Field(None, description="The layer indexes to transform")
     layers_pattern: Optional[str] = Field(None, description="The layer pattern name")
     # rank_pattern: Optional[Dict[str, int]] = Field(
     #     {}, description="The mapping from layer names or regexp expression to ranks"
@@ -154,15 +129,9 @@ class LoraConfig(BaseModel):
 # TODO: Get comprehensive Args!
 class TrainingArgs(BaseModel):
     num_train_epochs: Optional[int] = Field(1, description="Number of training epochs")
-    per_device_train_batch_size: Optional[int] = Field(
-        1, description="Batch size per training device"
-    )
-    gradient_accumulation_steps: Optional[int] = Field(
-        1, description="Number of steps for gradient accumulation"
-    )
-    gradient_checkpointing: Optional[bool] = Field(
-        True, description="Flag to enable gradient checkpointing"
-    )
+    per_device_train_batch_size: Optional[int] = Field(1, description="Batch size per training device")
+    gradient_accumulation_steps: Optional[int] = Field(1, description="Number of steps for gradient accumulation")
+    gradient_checkpointing: Optional[bool] = Field(True, description="Flag to enable gradient checkpointing")
     optim: Optional[str] = Field("paged_adamw_32bit", description="Optimizer")
     logging_steps: Optional[int] = Field(100, description="Number of logging steps")
     learning_rate: Optional[float] = Field(2.0e-4, description="Learning rate")
@@ -171,9 +140,7 @@ class TrainingArgs(BaseModel):
     fp16: Optional[bool] = Field(False, description="Flag to enable fp16")
     max_grad_norm: Optional[float] = Field(0.3, description="Maximum gradient norm")
     warmup_ratio: Optional[float] = Field(0.03, description="Warmup ratio")
-    lr_scheduler_type: Optional[str] = Field(
-        "constant", description="Learning rate scheduler type"
-    )
+    lr_scheduler_type: Optional[str] = Field("constant", description="Learning rate scheduler type")
 
 
 # TODO: Get comprehensive Args!
diff --git a/llmtune/qa/generics.py b/llmtune/qa/generics.py
@@ -14,9 +14,7 @@ def test_name(self) -> str:
         pass
 
     @abstractmethod
-    def get_metric(
-        self, prompt: str, grount_truth: str, model_pred: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, grount_truth: str, model_pred: str) -> Union[float, int, bool]:
         pass
 
 
@@ -45,7 +43,6 @@ def __init__(
         ground_truths: List[str],
         model_preds: List[str],
     ) -> None:
-
         self.tests = tests
         self.prompts = prompts
         self.ground_truths = ground_truths
@@ -57,9 +54,7 @@ def run_tests(self) -> Dict[str, List[Union[float, int, bool]]]:
         test_results = {}
         for test in zip(self.tests):
             metrics = []
-            for prompt, ground_truth, model_pred in zip(
-                self.prompts, self.ground_truths, self.model_preds
-            ):
+            for prompt, ground_truth, model_pred in zip(self.prompts, self.ground_truths, self.model_preds):
                 metrics.append(test.get_metric(prompt, ground_truth, model_pred))
             test_results[test.test_name] = metrics
 
@@ -74,14 +69,10 @@ def print_test_results(self):
         result_dictionary = self.test_results()
         column_data = {key: list(result_dictionary[key]) for key in result_dictionary}
         mean_values = {key: statistics.mean(column_data[key]) for key in column_data}
-        median_values = {
-            key: statistics.median(column_data[key]) for key in column_data
-        }
+        median_values = {key: statistics.median(column_data[key]) for key in column_data}
         stdev_values = {key: statistics.stdev(column_data[key]) for key in column_data}
         # Use the RichUI class to display the table
-        RichUI.display_table(
-            result_dictionary, mean_values, median_values, stdev_values
-        )
+        RichUI.display_table(result_dictionary, mean_values, median_values, stdev_values)
 
     def save_test_results(self, path: str):
         # TODO: save these!
diff --git a/llmtune/qa/qa_tests.py b/llmtune/qa/qa_tests.py
@@ -27,9 +27,7 @@ class LengthTest(LLMQaTest):
     def test_name(self) -> str:
         return "summary_length"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
         return abs(len(ground_truth) - len(model_prediction))
 
 
@@ -39,9 +37,7 @@ class JaccardSimilarityTest(LLMQaTest):
     def test_name(self) -> str:
         return "jaccard_similarity"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
         set_ground_truth = set(ground_truth.lower())
         set_model_prediction = set(model_prediction.lower())
 
@@ -64,14 +60,10 @@ def _encode_sentence(self, sentence):
             outputs = model(**tokens)
         return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
         embedding_ground_truth = self._encode_sentence(ground_truth)
         embedding_model_prediction = self._encode_sentence(model_prediction)
-        dot_product_similarity = np.dot(
-            embedding_ground_truth, embedding_model_prediction
-        )
+        dot_product_similarity = np.dot(embedding_ground_truth, embedding_model_prediction)
         return dot_product_similarity
 
 
@@ -81,9 +73,7 @@ class RougeScoreTest(LLMQaTest):
     def test_name(self) -> str:
         return "rouge_score"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
         scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=True)
         scores = scorer.score(model_prediction, ground_truth)
         return float(scores["rouge1"].precision)
@@ -101,9 +91,7 @@ def _remove_stopwords(self, text: str) -> str:
         filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
         return " ".join(filtered_text)
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> Union[float, int, bool]:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> Union[float, int, bool]:
         cleaned_model_prediction = self._remove_stopwords(model_prediction)
         cleaned_ground_truth = self._remove_stopwords(ground_truth)
 
@@ -130,12 +118,8 @@ class VerbPercent(PosCompositionTest):
     def test_name(self) -> str:
         return "verb_percent"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> float:
-        return self._get_pos_percent(
-            model_prediction, ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
-        )
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
+        return self._get_pos_percent(model_prediction, ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"])
 
 
 @TestRegistry.register("adjective_percent")
@@ -144,9 +128,7 @@ class AdjectivePercent(PosCompositionTest):
     def test_name(self) -> str:
         return "adjective_percent"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> float:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
         return self._get_pos_percent(model_prediction, ["JJ", "JJR", "JJS"])
 
 
@@ -156,9 +138,7 @@ class NounPercent(PosCompositionTest):
     def test_name(self) -> str:
         return "noun_percent"
 
-    def get_metric(
-        self, prompt: str, ground_truth: str, model_prediction: str
-    ) -> float:
+    def get_metric(self, prompt: str, ground_truth: str, model_prediction: str) -> float:
         return self._get_pos_percent(model_prediction, ["NN", "NNS", "NNP", "NNPS"])
 
 
diff --git a/llmtune/ui/rich_ui.py b/llmtune/ui/rich_ui.py
diff --git a/llmtune/utils/ablation_utils.py b/llmtune/utils/ablation_utils.py
diff --git a/toolkit.py b/toolkit.py