Sync release/0.10 with patches from main (#513)

bfineran · KSGulin · dependabot[bot] · web-flow · commit 38fd61d2ef88 · 2022-01-21T15:07:14.000-05:00
* Update ViT Example + Fix Readme (#507) * Update: More straightforward ViT example and recipe * Bump numpy in /research/information_retrieval/doc2query (#503) Bumps [numpy](https://github.com/numpy/numpy) from 1.20.3 to 1.21.0. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/HOWTO_RELEASE.rst.txt) - [Commits](numpy/numpy@v1.20.3...v1.21.0) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Mark Kurtz <mark@neuralmagic.com> * Bump pillow in /research/information_retrieval/doc2query (#502) Bumps [pillow](https://github.com/python-pillow/Pillow) from 8.3.2 to 9.0.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](python-pillow/Pillow@8.3.2...9.0.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Mark Kurtz <mark@neuralmagic.com> * Bump nltk in /research/information_retrieval/doc2query (#501) Bumps [nltk](https://github.com/nltk/nltk) from 3.6.5 to 3.6.6. - [Release notes](https://github.com/nltk/nltk/releases) - [Changelog](https://github.com/nltk/nltk/blob/develop/ChangeLog) - [Commits](nltk/nltk@3.6.5...3.6.6) --- updated-dependencies: - dependency-name: nltk dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Mark Kurtz <mark@neuralmagic.com> * Update trainer.py (#506) * Update trainer.py recipe saving fixes for ddp * fix style * simplifying Co-authored-by: Daniel Campos <dfcf93@hotmail.com> Co-authored-by: Benjamin Fineran <bfineran@users.noreply.github.com> * bugfix: pytorch train save by top-1 acc (#509) * [transformers] trainers to prioritize recipe num_epochs + 0.10 fixes (#505) * allow for underscores in transformers onnx export task names * split out checkpoint and current recipes, QA CSV data load fix * propagate chagnes to task-level trainers * remove broken epoch override * add check before apply * json to jsonl conversion tool for HF custom data ease of use * Install Sparseml-Transformers deps dyanmically on import (#511) * Update: install sparseml on setup (#512) * Update: install sparseml on setup * Fix: typo Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Mark Kurtz <mark@neuralmagic.com> Co-authored-by: spacemanidol <dcampos3@illinois.edu> Co-authored-by: Daniel Campos <dfcf93@hotmail.com>
diff --git a/integrations/pytorch/train.py b/integrations/pytorch/train.py
@@ -606,7 +606,12 @@ def train(
                 val_metric = val_res.result_mean(target_metric).item()
 
                 if epoch >= train_args.save_best_after and (
-                    best_metric is None or val_metric <= best_metric
+                    best_metric is None
+                    or (
+                        val_metric <= best_metric
+                        if target_metric != "top1acc"
+                        else val_metric >= best_metric
+                    )
                 ):
                     utils.save_model_training(
                         model,
diff --git a/integrations/rwightman-timm/setup_integration.sh b/integrations/rwightman-timm/setup_integration.sh
@@ -8,3 +8,4 @@ git clone https://github.com/neuralmagic/pytorch-image-models.git
 cd pytorch-image-models
 git checkout release/0.10
 pip install -r requirements.txt
+pip install sparseml[torch]
diff --git a/research/information_retrieval/doc2query/requirements.txt b/research/information_retrieval/doc2query/requirements.txt
@@ -55,9 +55,9 @@ nbconvert==6.0.7
 nbformat==5.1.3
 nest-asyncio==1.5.1
 networkx==2.5.1
-nltk==3.6.5
+nltk==3.6.6
 notebook==6.4.1
-numpy==1.20.3
+numpy==1.21.0
 onnx==1.7.0
 onnxruntime==1.8.0
 packaging==20.9
@@ -68,7 +68,7 @@ pathtools==0.1.2
 pathy==0.5.2
 pexpect==4.8.0
 pickleshare==0.7.5
-Pillow==8.3.2
+Pillow==9.0.0
 preshed==3.0.5
 progressbar2==3.53.1
 prometheus-client==0.11.0
diff --git a/setup.py b/setup.py
@@ -30,12 +30,6 @@
 
 _PACKAGE_NAME = "sparseml" if is_release else "sparseml-nightly"
 
-transformers_branch = "master" if not is_release else f"release/{version_major_minor}"
-transformers_requirement = (
-    "transformers @ git+https://github.com/neuralmagic/transformers.git"
-    f"@{transformers_branch}"
-)
-
 _deps = [
     "jupyter>=1.0.0",
     "ipywidgets>=7.0.0",
@@ -69,13 +63,6 @@
     "tf2onnx>=1.0.0,<1.6",
 ]
 _keras_deps = ["tensorflow~=2.2.0", "keras2onnx>=1.0.0"]
-_transformers_deps = [
-    "torch>=1.1.0,<1.9.0",
-    transformers_requirement,
-    "datasets",
-    "sklearn",
-    "seqeval",
-] + _pytorch_deps[1:]
 
 _dev_deps = [
     "beautifulsoup4==4.9.3",
@@ -122,7 +109,6 @@ def _setup_extras() -> Dict:
         "tf_v1": _tensorflow_v1_deps,
         "tf_v1_gpu": _tensorflow_v1_gpu_deps,
         "tf_keras": _keras_deps,
-        "transformers": _transformers_deps,
     }
 
 
diff --git a/src/sparseml/transformers/__init__.py b/src/sparseml/transformers/__init__.py
@@ -18,22 +18,87 @@
 
 # flake8: noqa
 
+import logging as _logging
+
+
 try:
     import transformers as _transformers
 
-    transformers_import_error = None
-except Exception as transformers_import_err:
-    transformers_import_error = transformers_import_err
+    _transformers_import_error = None
+except Exception as _transformers_import_err:
+    _transformers_import_error = _transformers_import_err
 
 
-def _check_transformers_install():
-    if transformers_import_error is None:
-        return
-    raise ImportError(
-        "No installation of transformers found. It is recommended to use the "
-        "sparseml fork of transformers which can be installed under "
-        "sparseml[transformers] or git+https://github.com/neuralmagic/transformers.git"
+_LOGGER = _logging.getLogger(__name__)
+
+
+def _install_transformers_and_deps():
+
+    import pip as _pip
+    import sparseml as _sparseml
+
+    transformers_branch = (
+        "master"
+        if not _sparseml.is_release
+        else f"release/{_sparseml.version_major_minor}"
     )
+    transformers_requirement = (
+        "transformers @ git+https://github.com/neuralmagic/transformers.git"
+        f"@{transformers_branch}"
+    )
+
+    try:
+        _pip.main(
+            [
+                "install",
+                transformers_requirement,
+                "datasets",
+                "sklearn",
+                "seqeval",
+            ]
+        )
+
+        import transformers as _transformers
+
+        _LOGGER.info("sparseml-transformers and dependencies successfully installed")
+    except Exception:
+        raise ValueError(
+            "Unable to install and import sparseml-transformers dependencies check "
+            "that transformers is installed, if not, install via "
+            "`pip install git+https://github.com/neuralmagic/transformers.git`"
+        )
+
+
+def _check_transformers_install():
+    if _transformers_import_error is not None:
+        import os
+
+        if os.getenv("NM_NO_AUTOINSTALL_TRANSFORMERS", False):
+            _LOGGER.warning(
+                "Unable to import transformers, skipping auto installation "
+                "due to NM_NO_AUTOINSTALL_TRANSFORMERS"
+            )
+            # skip any further checks
+            return
+        else:
+            _LOGGER.info(
+                "No installation of transformers found. Installing sparseml-transformers "
+                "dependencies"
+            )
+            _install_transformers_and_deps()
+
+    # check NM fork installed with QATMatMul available
+    try:
+        import transformers as _transformers
+
+        _transformers.models.bert.modeling_bert.QATMatMul
+    except Exception:
+        _LOGGER.warning(
+            "transformers.models.bert.modeling_bert.QATMatMul not availalbe. the"
+            "neuralmagic fork of transformers may not be installed. it can be "
+            "installed via "
+            "`pip install git+https://github.com/neuralmagic/transformers.git`"
+        )
 
 
 _check_transformers_install()
diff --git a/src/sparseml/transformers/train/language_modeling.py b/src/sparseml/transformers/train/language_modeling.py
@@ -615,7 +615,8 @@ def group_texts(examples):
     # Initialize our Trainer
     trainer = SparseMLMLMTrainer(
         model_args.model_name_or_path,
-        [existing_recipe, new_recipe],
+        new_recipe,
+        checkpoint_recipes=[existing_recipe],
         teacher=teacher_model,
         model=model,
         args=training_args,
diff --git a/src/sparseml/transformers/train/question_answering.py b/src/sparseml/transformers/train/question_answering.py
@@ -396,11 +396,14 @@ def main():
         if data_args.test_file is not None:
             data_files["test"] = data_args.test_file
             extension = data_args.test_file.split(".")[-1]
+
+        # unwrap data from json
+        load_kwargs = {"field": "data"} if extension == "json" else {}
         datasets = load_dataset(
             extension,
             data_files=data_files,
-            field="data",
             cache_dir=model_args.cache_dir,
+            **load_kwargs,
         )
     # See more about loading any type of standard or custom dataset
     # (from files, python dict, pandas DataFrame, etc) at
@@ -742,7 +745,8 @@ def compute_metrics(p: EvalPrediction):
     # Initialize our Trainer
     trainer = SparseMLQATrainer(
         model_args.model_name_or_path,
-        [existing_recipe, new_recipe],
+        recipe=new_recipe,
+        checkpoint_recipes=[existing_recipe],
         teacher=teacher_model,
         model=model,
         args=training_args,
diff --git a/src/sparseml/transformers/train/text_classification.py b/src/sparseml/transformers/train/text_classification.py
@@ -624,7 +624,8 @@ def compute_metrics(p: EvalPrediction):
     # Initialize our Trainer
     trainer = SparseMLGLUETrainer(
         model_args.model_name_or_path,
-        [existing_recipe, new_recipe],
+        new_recipe,
+        checkpoint_recipes=[existing_recipe],
         teacher=teacher_model,
         model=model,
         args=training_args,
diff --git a/src/sparseml/transformers/train/token_classification.py b/src/sparseml/transformers/train/token_classification.py
@@ -556,7 +556,8 @@ def compute_metrics(p):
     # Initialize our Trainer
     trainer = SparseMLNERTrainer(
         model_args.model_name_or_path,
-        [existing_recipe, new_recipe],
+        new_recipe,
+        checkpoint_recipes=[existing_recipe],
         teacher=teacher_model,
         model=model,
         args=training_args,
diff --git a/src/sparseml/transformers/utils/export.py b/src/sparseml/transformers/utils/export.py
@@ -123,12 +123,13 @@ def export_transformer_to_onnx(
         pipeline, it will look only for 'model.onnx'
     :return: path to the exported ONNX file
     """
-    if task.lower() not in _TASK_TO_CONSTRUCTOR:
+    task = "-".join(task.lower().split("_"))
+    if task not in _TASK_TO_CONSTRUCTOR:
         raise ValueError(
             f"task {task} unsupported for export_transformer_to_onnx. Supported "
             f"tasks include {list(_TASK_TO_CONSTRUCTOR.keys())}"
         )
-    auto_model_constructor = _TASK_TO_CONSTRUCTOR[task.lower()]
+    auto_model_constructor = _TASK_TO_CONSTRUCTOR[task]
 
     if not os.path.isdir(model_path):
         raise ValueError(
diff --git a/src/sparseml/transformers/utils/language_modeling.py b/src/sparseml/transformers/utils/language_modeling.py
@@ -32,8 +32,11 @@ class SparseMLMLMTrainer(SparseMLTrainer, Trainer):
     Trainer for running sparsification recipes with MLM training
 
     :param model_name_or_path: path to model directory to be trained
-    :param recipes: list of paths to recipes for model sparsification or string
-        recipes for sparsification. Can also be single string path or recipe
+    :param recipe: path to recipe for model sparsification
+    :param checkpoint_recipes: list of paths to recipes used to train the
+        starting checkpoint for this training run. Will be applied to the model
+        on call to `apply_recipes` so that model state can be reproduced for
+        weight loading
     :param teacher: teacher model for distillation. Default is None
     :param recipe_args: Dictionary of recipe variables to override or json
         loadable string of those args. Default is None
@@ -44,15 +47,17 @@ class SparseMLMLMTrainer(SparseMLTrainer, Trainer):
     def __init__(
         self,
         model_name_or_path: str,
-        recipes: Union[str, List[str]],
+        recipe: str,
+        checkpoint_recipes: Union[str, List[str]] = None,
         teacher: Optional[torch.nn.Module] = None,
         recipe_args: Union[Dict[str, Any], str] = None,
         *args,
         **kwargs,
     ):
         super().__init__(
             model_name_or_path=model_name_or_path,
-            recipes=recipes,
+            recipe=recipe,
+            checkpoint_recipes=checkpoint_recipes,
             teacher=teacher,
             recipe_args=recipe_args,
             teacher_input_keys=None,
diff --git a/src/sparseml/transformers/utils/question_answering.py b/src/sparseml/transformers/utils/question_answering.py
@@ -149,8 +149,11 @@ class SparseMLQATrainer(SparseMLTrainer, _QuestionAnsweringTrainer):
     Trainer for running sparsification recipes with Question Answering training
 
     :param model_name_or_path: path to model directory to be trained
-    :param recipes: list of paths to recipes for model sparsification or string
-        recipes for sparsification. Can also be single string path or recipe
+    :param recipe: path to recipe for model sparsification
+    :param checkpoint_recipes: list of paths to recipes used to train the
+        starting checkpoint for this training run. Will be applied to the model
+        on call to `apply_recipes` so that model state can be reproduced for
+        weight loading
     :param teacher: teacher model for distillation. Default is None
     :param recipe_args: Dictionary of recipe variables to override or json
         loadable string of those args. Default is None
@@ -161,15 +164,17 @@ class SparseMLQATrainer(SparseMLTrainer, _QuestionAnsweringTrainer):
     def __init__(
         self,
         model_name_or_path: str,
-        recipes: Union[str, List[str]],
+        recipe: str,
+        checkpoint_recipes: Union[str, List[str]] = None,
         teacher: Optional[torch.nn.Module] = None,
         recipe_args: Union[Dict[str, Any], str] = None,
         *args,
         **kwargs,
     ):
         super().__init__(
             model_name_or_path=model_name_or_path,
-            recipes=recipes,
+            recipe=recipe,
+            checkpoint_recipes=checkpoint_recipes,
             teacher=teacher,
             recipe_args=recipe_args,
             teacher_input_keys=["input_ids", "token_type_ids", "attention_mask"],
diff --git a/src/sparseml/transformers/utils/text_classification.py b/src/sparseml/transformers/utils/text_classification.py
@@ -32,8 +32,11 @@ class SparseMLGLUETrainer(SparseMLTrainer, Trainer):
     Trainer for running sparsification recipes with GLUE training
 
     :param model_name_or_path: path to model directory to be trained
-    :param recipes: list of paths to recipes for model sparsification or string
-        recipes for sparsification. Can also be single string path or recipe
+    :param recipe: path to recipe for model sparsification
+    :param checkpoint_recipes: list of paths to recipes used to train the
+        starting checkpoint for this training run. Will be applied to the model
+        on call to `apply_recipes` so that model state can be reproduced for
+        weight loading
     :param teacher: teacher model for distillation. Default is None
     :param recipe_args: Dictionary of recipe variables to override or json
         loadable string of those args. Default is None
@@ -44,15 +47,17 @@ class SparseMLGLUETrainer(SparseMLTrainer, Trainer):
     def __init__(
         self,
         model_name_or_path: str,
-        recipes: Union[str, List[str]],
+        recipe: str,
+        checkpoint_recipes: Union[str, List[str]] = None,
         teacher: Optional[torch.nn.Module] = None,
         recipe_args: Union[Dict[str, Any], str] = None,
         *args,
         **kwargs,
     ):
         super().__init__(
             model_name_or_path=model_name_or_path,
-            recipes=recipes,
+            recipe=recipe,
+            checkpoint_recipes=checkpoint_recipes,
             teacher=teacher,
             recipe_args=recipe_args,
             teacher_input_keys=["input_ids", "token_type_ids", "attention_mask"],
diff --git a/src/sparseml/transformers/utils/token_classification.py b/src/sparseml/transformers/utils/token_classification.py
@@ -32,8 +32,11 @@ class SparseMLNERTrainer(SparseMLTrainer, Trainer):
     Trainer for running sparsification recipes with NER training
 
     :param model_name_or_path: path to model directory to be trained
-    :param recipes: list of paths to recipes for model sparsification or string
-        recipes for sparsification. Can also be single string path or recipe
+    :param recipe: path to recipe for model sparsification
+    :param checkpoint_recipes: list of paths to recipes used to train the
+        starting checkpoint for this training run. Will be applied to the model
+        on call to `apply_recipes` so that model state can be reproduced for
+        weight loading
     :param teacher: teacher model for distillation. Default is None
     :param recipe_args: Dictionary of recipe variables to override or json
         loadable string of those args. Default is None
@@ -44,15 +47,17 @@ class SparseMLNERTrainer(SparseMLTrainer, Trainer):
     def __init__(
         self,
         model_name_or_path: str,
-        recipes: Union[str, List[str]],
+        recipe: str,
+        checkpoint_recipes: Union[str, List[str]] = None,
         teacher: Optional[torch.nn.Module] = None,
         recipe_args: Union[Dict[str, Any], str] = None,
         *args,
         **kwargs,
     ):
         super().__init__(
             model_name_or_path=model_name_or_path,
-            recipes=recipes,
+            recipe=recipe,
+            checkpoint_recipes=checkpoint_recipes,
             teacher=teacher,
             recipe_args=recipe_args,
             teacher_input_keys=None,
diff --git a/src/sparseml/transformers/utils/trainer.py b/src/sparseml/transformers/utils/trainer.py
diff --git a/src/sparseml/utils/helpers.py b/src/sparseml/utils/helpers.py