diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 1b32e1a1a..489b43310 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -219,12 +219,13 @@ jobs: notebook-checks: name: Notebook Checks runs-on: ubuntu-latest + timeout-minutes: 30 strategy: fail-fast: false matrix: notebook: - # - "Activation_Patching_in_TL_Demo" - # - "Attribution_Patching_Demo" + - "Attribution_Patching_Demo" + - "Activation_Patching_in_TL_Demo" - "ARENA_Content" - "BERT" - "Exploratory_Analysis_Demo" @@ -237,9 +238,17 @@ jobs: # - "No_Position_Experiment" - "Othello_GPT" - "Patchscopes_Generation_Demo" - # - "T5" + - "T5" steps: - uses: actions/checkout@v3 + - name: Add swap space + run: | + sudo swapoff /swapfile 2>/dev/null || true + sudo rm -f /swapfile + sudo fallocate -l 8G /swapfile + sudo chmod 600 /swapfile + sudo mkswap /swapfile + sudo swapon /swapfile - name: Install uv uses: astral-sh/setup-uv@v6 with: diff --git a/demos/Activation_Patching_in_TL_Demo.ipynb b/demos/Activation_Patching_in_TL_Demo.ipynb index abc033ad7..d71858f6d 100644 --- a/demos/Activation_Patching_in_TL_Demo.ipynb +++ b/demos/Activation_Patching_in_TL_Demo.ipynb @@ -58,7 +58,7 @@ " import google.colab\n", " IN_COLAB = True\n", " print(\"Running as a Colab notebook\")\n", - " %pip install git+https://github.com/TransformerLensOrg/TransformerLens.git\n", + " %pip install transformer_lens\n", " # Install my janky personal plotting utils\n", " %pip install git+https://github.com/neelnanda-io/neel-plotly.git\n", "except:\n", @@ -67,7 +67,7 @@ " from IPython import get_ipython\n", "\n", " ipython = get_ipython()\n", - " # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n", + " # Code to automatically update the TransformerBridge code as its edited without restarting the kernel\n", " ipython.run_line_magic(\"load_ext\", \"autoreload\")\n", " ipython.run_line_magic(\"autoreload\", \"2\")" ] @@ -127,11 +127,7 @@ "source": [ "import transformer_lens\n", "import transformer_lens.utils as utils\n", - "from transformer_lens.hook_points import (\n", - " HookedRootModule,\n", - " HookPoint,\n", - ") # Hooking utilities\n", - "from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache" + "from transformer_lens.model_bridge import TransformerBridge" ] }, { @@ -145,18 +141,7 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "_ = torch.set_grad_enabled(False)" @@ -175,7 +160,14 @@ "metadata": {}, "outputs": [], "source": [ - "from neel_plotly import line, imshow, scatter" + "try:\n", + " from neel_plotly import line, imshow, scatter\n", + "except ImportError:\n", + " # neel_plotly is an optional visualization dependency.\n", + " # Define no-op stubs so patching computations still run without it.\n", + " def line(*args, **kwargs): pass\n", + " def imshow(*args, **kwargs): pass\n", + " def scatter(*args, **kwargs): pass" ] }, { @@ -201,22 +193,31 @@ "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using pad_token, but it is not set yet.\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fe942617d9f4430586031676e4597f95", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading weights: 0%| | 0/148 [00:00\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -433,7 +434,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "49d0b56fa468408ca15afa42d0d1c91b", + "model_id": "0191b6a12fd6425ba35a8c9fdbe0b1bb", "version_major": 2, "version_minor": 0 }, @@ -451,9 +452,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -509,7 +510,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "04132888196746f3b16918c63dd6c023", + "model_id": "237157b3e7ca4a69b8ab320584fad22b", "version_major": 2, "version_minor": 0 }, @@ -527,9 +528,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -561,6 +562,8 @@ } ], "source": [ + "# 2,160 forward passes of GPT-2; too slow for CI without GPU\n", + "# NBVAL_SKIP\n", "ALL_HEAD_LABELS = [f\"L{i}H{j}\" for i in range(model.cfg.n_layers) for j in range(model.cfg.n_heads)]\n", "if DO_SLOW_RUNS:\n", " attn_head_out_act_patch_results = patching.get_act_patch_attn_head_out_by_pos(model, corrupted_tokens, clean_cache, ioi_metric)\n", @@ -590,7 +593,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9dec177dd2f446248b5850e149fab8fc", + "model_id": "440041cdc83b4bc3b1f991cefa1df6f4", "version_major": 2, "version_minor": 0 }, @@ -604,7 +607,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ec9aae8965c84a819fcf6158dbc45fa6", + "model_id": "05b1394ee4a54d8e94050197279d2a11", "version_major": 2, "version_minor": 0 }, @@ -618,7 +621,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "caaf63fc68224f4babc8f492d55785e2", + "model_id": "4f5b66701221464580ccc06f25d257e0", "version_major": 2, "version_minor": 0 }, @@ -636,9 +639,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -689,7 +692,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "997da4ffc80c4a87a827ab16aa1b76d7", + "model_id": "fd46410602d648ecb4d8e5c603f599c7", "version_major": 2, "version_minor": 0 }, @@ -703,7 +706,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c71892621747426b8f69dc36dcee104e", + "model_id": "898ce7b338ec42b692974fb4af3d3d3a", "version_major": 2, "version_minor": 0 }, @@ -717,7 +720,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1df77d7824f44ba58a08f37129cc7722", + "model_id": "a202d7fa0e894969b6093823c3a07900", "version_major": 2, "version_minor": 0 }, @@ -731,7 +734,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fd3be25d4cd04a3bbbbec2c24a7b9d6b", + "model_id": "07f6b5419b414d739882be1135a0ac26", "version_major": 2, "version_minor": 0 }, @@ -745,7 +748,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fc48b4abdc40412387bbea701ce028ed", + "model_id": "487f335aa1fb4f0a93c04944fcae2918", "version_major": 2, "version_minor": 0 }, @@ -763,9 +766,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -805,120 +808,12 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "92709c396fdd48a7b169b87cc40d4ac2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/2160 [00:00\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ + "# 10,800 forward passes of GPT-2; too slow for CI without GPU\n", + "# NBVAL_SKIP\n", "if DO_SLOW_RUNS:\n", " every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(model, corrupted_tokens, clean_cache, ioi_metric)\n", " every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n", @@ -944,18 +839,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded pretrained model attn-only-2l into HookedTransformer\n" - ] - } - ], + "outputs": [], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "# attn-only-2l is a custom TransformerLens model (not a standard HF architecture),\n", + "# so we load it via HookedTransformer rather than TransformerBridge.\n", + "from transformer_lens import HookedTransformer\n", + "\n", "attn_only = HookedTransformer.from_pretrained(\"attn-only-2l\")\n", "batch = 4\n", "seq_len = 20\n", @@ -987,32 +879,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Clean baseline: -2.423922538757324\n", - "Corrupted baseline: -13.486991882324219\n" - ] - } - ], - "source": [ - "def induction_loss(logits, answer_token_indices=rand_tokens_A):\n", - " seq_len = answer_token_indices.shape[1]\n", - "\n", - " # logits: batch x seq_len x vocab_size\n", - " # Take the logits for the answers, cut off the final element to get the predictions for all but the first element of the answers (which can't be predicted)\n", - " final_logits = logits[:, -seq_len:-1]\n", - " final_log_probs = final_logits.log_softmax(-1)\n", - " return final_log_probs.gather(-1, answer_token_indices[:, 1:].unsqueeze(-1)).mean()\n", - "CLEAN_BASELINE_INDUCTION = induction_loss(clean_logits_induction).item()\n", - "print(\"Clean baseline:\", CLEAN_BASELINE_INDUCTION)\n", - "CORRUPTED_BASELINE_INDUCTION = induction_loss(corrupted_logits_induction).item()\n", - "print(\"Corrupted baseline:\", CORRUPTED_BASELINE_INDUCTION)" - ] + "outputs": [], + "source": "# NBVAL_IGNORE_OUTPUT\ndef induction_loss(logits, answer_token_indices=rand_tokens_A):\n seq_len = answer_token_indices.shape[1]\n\n # logits: batch x seq_len x vocab_size\n # Take the logits for the answers, cut off the final element to get the predictions for all but the first element of the answers (which can't be predicted)\n final_logits = logits[:, -seq_len:-1]\n final_log_probs = final_logits.log_softmax(-1)\n return final_log_probs.gather(-1, answer_token_indices[:, 1:].unsqueeze(-1)).mean()\nCLEAN_BASELINE_INDUCTION = induction_loss(clean_logits_induction).item()\nprint(\"Clean baseline:\", CLEAN_BASELINE_INDUCTION)\nCORRUPTED_BASELINE_INDUCTION = induction_loss(corrupted_logits_induction).item()\nprint(\"Corrupted baseline:\", CORRUPTED_BASELINE_INDUCTION)" }, { "cell_type": "markdown", @@ -1023,167 +893,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b772b9d6bf024a6c9aaa1a584e34fbcf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/16 [00:00\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n", - "imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n", - "\n", - "if DO_SLOW_RUNS:\n", - " every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n", - " every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n", - " imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=CLEAN_BASELINE_INDUCTION, x= [f\"{tok}_{i}\" for i, tok in enumerate(attn_only.to_str_tokens(clean_tokens_induction[0]))], y=[f\"L{l}H{h}\" for l in range(attn_only.cfg.n_layers) for h in range(attn_only.cfg.n_heads)])" - ] + "outputs": [], + "source": "# NBVAL_SKIP\n# Heavy patching computation — too slow for CI without GPU\nevery_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\nimshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n\nif DO_SLOW_RUNS:\n every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=CLEAN_BASELINE_INDUCTION, x= [f\"{tok}_{i}\" for i, tok in enumerate(attn_only.to_str_tokens(clean_tokens_induction[0]))], y=[f\"L{l}H{h}\" for l in range(attn_only.cfg.n_layers) for h in range(attn_only.cfg.n_heads)])" }, { "cell_type": "markdown", @@ -1552,7 +1265,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "transformer-lens", "language": "python", "name": "python3" }, @@ -1566,14 +1279,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.13" + "version": "3.12.12" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" - } - } + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 diff --git a/demos/Attribution_Patching_Demo.ipynb b/demos/Attribution_Patching_Demo.ipynb index bd9b6c707..3638ce2be 100644 --- a/demos/Attribution_Patching_Demo.ipynb +++ b/demos/Attribution_Patching_Demo.ipynb @@ -45,28 +45,28 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:47.592530Z", + "iopub.status.busy": "2026-03-06T20:17:47.592314Z", + "iopub.status.idle": "2026-03-06T20:17:47.623795Z", + "shell.execute_reply": "2026-03-06T20:17:47.623529Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Running as a Jupyter notebook - intended for development only!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_25358/2480103146.py:24: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"load_ext autoreload\")\n", - "/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_25358/2480103146.py:25: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"autoreload 2\")\n" + "Running as a Jupyter notebook - intended for development only!\n", + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" ] } ], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", "import os\n", "\n", @@ -100,8 +100,15 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:47.641002Z", + "iopub.status.busy": "2026-03-06T20:17:47.640902Z", + "iopub.status.idle": "2026-03-06T20:17:48.042243Z", + "shell.execute_reply": "2026-03-06T20:17:48.041991Z" + } + }, "outputs": [], "source": [ "# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n", @@ -116,21 +123,16 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'torchtyping'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mplotly\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexpress\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpx\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataLoader\n\u001b[0;32m---> 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorchtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TensorType \u001b[38;5;28;01mas\u001b[39;00m TT\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List, Union, Optional, Callable\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m partial\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'torchtyping'" - ] + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:48.043320Z", + "iopub.status.busy": "2026-03-06T20:17:48.043256Z", + "iopub.status.idle": "2026-03-06T20:17:49.916214Z", + "shell.execute_reply": "2026-03-06T20:17:49.915938Z" } - ], + }, + "outputs": [], "source": [ "# Import stuff\n", "import torch\n", @@ -146,7 +148,9 @@ "import plotly.express as px\n", "from torch.utils.data import DataLoader\n", "\n", - "from torchtyping import TensorType as TT\n", + "from typing import Any\n", + "class TT: # torchtyping stub\n", + " def __class_getitem__(cls, _): return Any\n", "from typing import List, Union, Optional, Callable\n", "from functools import partial\n", "import copy\n", @@ -156,27 +160,30 @@ "from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer\n", "import dataclasses\n", "import datasets\n", - "from IPython.display import HTML, Markdown" + "from IPython.display import HTML, Markdown\n", + "\n", + "import circuitsvis as cv\n" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:49.917420Z", + "iopub.status.busy": "2026-03-06T20:17:49.917312Z", + "iopub.status.idle": "2026-03-06T20:17:50.070729Z", + "shell.execute_reply": "2026-03-06T20:17:50.070436Z" + } + }, "outputs": [], "source": [ "import transformer_lens\n", "import transformer_lens.utils as utils\n", - "from transformer_lens.hook_points import (\n", - " HookedRootModule,\n", - " HookPoint,\n", - ") # Hooking utilities\n", "from transformer_lens import (\n", - " HookedTransformer,\n", - " HookedTransformerConfig,\n", - " FactoredMatrix,\n", " ActivationCache,\n", - ")" + ")\n", + "from transformer_lens.model_bridge import TransformerBridge" ] }, { @@ -188,17 +195,38 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:50.071943Z", + "iopub.status.busy": "2026-03-06T20:17:50.071881Z", + "iopub.status.idle": "2026-03-06T20:17:50.092786Z", + "shell.execute_reply": "2026-03-06T20:17:50.092534Z" + } + }, "outputs": [], "source": [ - "from neel_plotly import line, imshow, scatter" + "try:\n", + " from neel_plotly import line, imshow, scatter\n", + "except ImportError:\n", + " # neel_plotly is an optional visualization dependency.\n", + " # Define no-op stubs so patching computations still run without it.\n", + " def line(*args, **kwargs): pass\n", + " def imshow(*args, **kwargs): pass\n", + " def scatter(*args, **kwargs): pass\n" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:50.093774Z", + "iopub.status.busy": "2026-03-06T20:17:50.093722Z", + "iopub.status.idle": "2026-03-06T20:17:50.107523Z", + "shell.execute_reply": "2026-03-06T20:17:50.107307Z" + } + }, "outputs": [], "source": [ "import transformer_lens.patching as patching" @@ -214,33 +242,153 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:17:50.108499Z", + "iopub.status.busy": "2026-03-06T20:17:50.108449Z", + "iopub.status.idle": "2026-03-06T20:18:11.048982Z", + "shell.execute_reply": "2026-03-06T20:18:11.048621Z" + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Using pad_token, but it is not set yet.\n" + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded pretrained model gpt2-small into HookedTransformer\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "022a5a438ab24589bda1266d4dba933d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/665 [00:00= 2 and v.shape[0] > 1:\n", + " cache.cache_dict[\"hook_pos_embed\"] = pe.expand(v.shape[0], -1, -1)\n", + " break\n", + "\n", + "\n", + "def _fix_head_result_cache(cache, model, is_grad=False):\n", + " \"\"\"Fix hook_result shape for TransformerBridge compatibility.\n", "\n", - " def forward_cache_hook(act, hook):\n", - " cache[hook.name] = act.detach()\n", + " TransformerBridge's hook_result captures [batch, pos, d_model] (post-head-sum)\n", + " but stack_head_results expects [batch, pos, n_heads, d_model] (pre-sum).\n", "\n", - " model.add_hook(filter_not_qkv_input, forward_cache_hook, \"fwd\")\n", + " For forward caches: remove entries so compute_head_results recomputes from z + W_O.\n", + " For grad caches: expand gradient from [batch, pos, d_model] to\n", + " [batch, pos, n_heads, d_model] since d(loss)/d(result_h) = d(loss)/d(out) for all h.\n", + " \"\"\"\n", + " n_heads = model.cfg.n_heads\n", + " for layer in range(model.cfg.n_layers):\n", + " key = f\"blocks.{layer}.attn.hook_result\"\n", + " if key not in cache.cache_dict:\n", + " continue\n", + " val = cache.cache_dict[key]\n", + " if not isinstance(val, torch.Tensor) or val.ndim != 3:\n", + " continue # Already correct shape or not a tensor\n", + " if is_grad:\n", + " # Gradient of sum is identity: d(loss)/d(result_h) = d(loss)/d(out)\n", + " cache.cache_dict[key] = val.unsqueeze(-2).expand(\n", + " *val.shape[:-1], n_heads, val.shape[-1]\n", + " )\n", + " else:\n", + " # Remove so compute_head_results can recompute from z + W_O\n", + " del cache.cache_dict[key]\n", "\n", + "\n", + "def get_cache_fwd_and_bwd(model, tokens, metric):\n", + " model.reset_hooks()\n", + " device = model.cfg.device\n", " grad_cache = {}\n", "\n", " def backward_cache_hook(act, hook):\n", @@ -417,13 +634,38 @@ "\n", " model.add_hook(filter_not_qkv_input, backward_cache_hook, \"bwd\")\n", "\n", - " value = metric(model(tokens))\n", + " # run_with_cache handles forward caching with proper alias resolution\n", + " output, fwd_cache = model.run_with_cache(\n", + " tokens, names_filter=filter_not_qkv_input\n", + " )\n", + "\n", + " # Fix forward cache: move from CPU back to model device, expand pos_embed\n", + " _fix_cache_device_and_pos_embed(fwd_cache, device)\n", + "\n", + " value = metric(output)\n", " value.backward()\n", " model.reset_hooks()\n", + "\n", + " # Add alias entries for grad_cache (backward hooks use hook.name, not dict key).\n", + " # Only add aliases that pass our filter to avoid KeyErrors downstream.\n", + " for key, hp in model.hook_dict.items():\n", + " if hp.name != key and filter_not_qkv_input(key):\n", + " if hp.name in grad_cache and key not in grad_cache:\n", + " grad_cache[key] = grad_cache[hp.name]\n", + "\n", + " # Fix grad cache pos_embed batch dimension too\n", + " grad_act_cache = ActivationCache(grad_cache, model)\n", + " _fix_cache_device_and_pos_embed(grad_act_cache, device)\n", + "\n", + " # Fix hook_result shape: TransformerBridge captures [batch, pos, d_model]\n", + " # but stack_head_results expects [batch, pos, n_heads, d_model]\n", + " _fix_head_result_cache(fwd_cache, model, is_grad=False)\n", + " _fix_head_result_cache(grad_act_cache, model, is_grad=True)\n", + "\n", " return (\n", " value.item(),\n", - " ActivationCache(cache, model),\n", - " ActivationCache(grad_cache, model),\n", + " fwd_cache,\n", + " grad_act_cache,\n", " )\n", "\n", "\n", @@ -438,7 +680,7 @@ ")\n", "print(\"Corrupted Value:\", corrupted_value)\n", "print(\"Corrupted Activations Cached:\", len(corrupted_cache))\n", - "print(\"Corrupted Gradients Cached:\", len(corrupted_grad_cache))" + "print(\"Corrupted Gradients Cached:\", len(corrupted_grad_cache))\n" ] }, { @@ -454,8 +696,15 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:11.625801Z", + "iopub.status.busy": "2026-03-06T20:18:11.625732Z", + "iopub.status.idle": "2026-03-06T20:18:11.643613Z", + "shell.execute_reply": "2026-03-06T20:18:11.643388Z" + } + }, "outputs": [], "source": [ "def create_attention_attr(\n", @@ -480,8 +729,15 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:11.644629Z", + "iopub.status.busy": "2026-03-06T20:18:11.644574Z", + "iopub.status.idle": "2026-03-06T20:18:11.658123Z", + "shell.execute_reply": "2026-03-06T20:18:11.657927Z" + } + }, "outputs": [ { "name": "stdout", @@ -515,8 +771,15 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:11.659129Z", + "iopub.status.busy": "2026-03-06T20:18:11.659081Z", + "iopub.status.idle": "2026-03-06T20:18:12.156846Z", + "shell.execute_reply": "2026-03-06T20:18:12.156025Z" + } + }, "outputs": [ { "data": { @@ -533,96 +796,18 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - " \n", - "
\n", - " \n", - " \n", - " " + "
\n", + " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -643,96 +828,18 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - " \n", - "
\n", - " \n", - " \n", - " " + "
\n", + " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -771,10 +878,10 @@ " if title:\n", " display(Markdown(\"### \" + title))\n", " display(\n", - " pysvelte.AttentionMulti(\n", + " cv.attention.attention_heads(\n", " tokens=model.to_str_tokens(tokens),\n", - " attention=attention_attr_signed.permute(1, 2, 0)[:, :, :top_k],\n", - " head_labels=head_labels[:top_k],\n", + " attention=attention_attr_signed[:top_k],\n", + " attention_head_names=head_labels[:top_k],\n", " )\n", " )\n", "\n", @@ -810,8 +917,15 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.159003Z", + "iopub.status.busy": "2026-03-06T20:18:12.158829Z", + "iopub.status.idle": "2026-03-06T20:18:12.248543Z", + "shell.execute_reply": "2026-03-06T20:18:12.248254Z" + } + }, "outputs": [ { "data": { @@ -820,9 +934,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -892,8 +1006,15 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.249641Z", + "iopub.status.busy": "2026-03-06T20:18:12.249552Z", + "iopub.status.idle": "2026-03-06T20:18:12.285135Z", + "shell.execute_reply": "2026-03-06T20:18:12.284915Z" + } + }, "outputs": [ { "data": { @@ -902,9 +1023,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -968,9 +1089,23 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.286080Z", + "iopub.status.busy": "2026-03-06T20:18:12.286028Z", + "iopub.status.idle": "2026-03-06T20:18:12.448891Z", + "shell.execute_reply": "2026-03-06T20:18:12.448663Z" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Tried to compute head results when they were already cached\n" + ] + }, { "data": { "text/html": [ @@ -978,9 +1113,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1017,9 +1152,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1108,8 +1243,15 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 19, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.449952Z", + "iopub.status.busy": "2026-03-06T20:18:12.449887Z", + "iopub.status.idle": "2026-03-06T20:18:12.566117Z", + "shell.execute_reply": "2026-03-06T20:18:12.565889Z" + } + }, "outputs": [ { "data": { @@ -1130,9 +1272,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1169,9 +1311,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1220,9 +1362,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1259,9 +1401,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1310,9 +1452,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1349,9 +1491,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1400,9 +1542,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1439,9 +1581,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1548,8 +1690,15 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, + "execution_count": 20, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.567150Z", + "iopub.status.busy": "2026-03-06T20:18:12.567086Z", + "iopub.status.idle": "2026-03-06T20:18:12.778073Z", + "shell.execute_reply": "2026-03-06T20:18:12.777627Z" + } + }, "outputs": [ { "data": { @@ -1566,96 +1715,18 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - " \n", - "
\n", - " \n", - " \n", - " " + "
\n", + " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1717,9 +1788,30 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.780118Z", + "iopub.status.busy": "2026-03-06T20:18:12.779943Z", + "iopub.status.idle": "2026-03-06T20:18:12.859568Z", + "shell.execute_reply": "2026-03-06T20:18:12.859350Z" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Tried to compute head results when they were already cached\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Tried to compute head results when they were already cached\n" + ] + }, { "data": { "text/html": [ @@ -1727,9 +1819,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -1871,8 +1963,15 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, + "execution_count": 22, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.861161Z", + "iopub.status.busy": "2026-03-06T20:18:12.861085Z", + "iopub.status.idle": "2026-03-06T20:18:12.909565Z", + "shell.execute_reply": "2026-03-06T20:18:12.909275Z" + } + }, "outputs": [ { "data": { @@ -1881,9 +1980,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1920,9 +2019,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -1981,8 +2080,15 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, + "execution_count": 23, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.910522Z", + "iopub.status.busy": "2026-03-06T20:18:12.910471Z", + "iopub.status.idle": "2026-03-06T20:18:12.960421Z", + "shell.execute_reply": "2026-03-06T20:18:12.960201Z" + } + }, "outputs": [ { "data": { @@ -1991,9 +2097,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2030,9 +2136,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2069,9 +2175,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2116,8 +2222,15 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": {}, + "execution_count": 24, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.961385Z", + "iopub.status.busy": "2026-03-06T20:18:12.961330Z", + "iopub.status.idle": "2026-03-06T20:18:12.993024Z", + "shell.execute_reply": "2026-03-06T20:18:12.992831Z" + } + }, "outputs": [ { "data": { @@ -2126,9 +2239,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2186,8 +2299,15 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, + "execution_count": 25, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:12.994018Z", + "iopub.status.busy": "2026-03-06T20:18:12.993966Z", + "iopub.status.idle": "2026-03-06T20:18:13.059361Z", + "shell.execute_reply": "2026-03-06T20:18:13.059153Z" + } + }, "outputs": [ { "data": { @@ -2196,9 +2316,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2235,9 +2355,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2274,9 +2394,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2361,12 +2481,27 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, + "execution_count": 26, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:13.060431Z", + "iopub.status.busy": "2026-03-06T20:18:13.060371Z", + "iopub.status.idle": "2026-03-06T20:18:13.135697Z", + "shell.execute_reply": "2026-03-06T20:18:13.135411Z" + } + }, "outputs": [], "source": [ "attribution_cache_dict = {}\n", - "for key in corrupted_grad_cache.cache_dict.keys():\n", + "# Only iterate keys present in all three caches to avoid KeyErrors\n", + "# from alias mismatches between forward cache (run_with_cache) and\n", + "# backward cache (manual hooks with hook.name keys).\n", + "shared_keys = (\n", + " set(corrupted_grad_cache.cache_dict.keys())\n", + " & set(clean_cache.cache_dict.keys())\n", + " & set(corrupted_cache.cache_dict.keys())\n", + ")\n", + "for key in shared_keys:\n", " attribution_cache_dict[key] = corrupted_grad_cache.cache_dict[key] * (\n", " clean_cache.cache_dict[key] - corrupted_cache.cache_dict[key]\n", " )\n", @@ -2382,8 +2517,15 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, + "execution_count": 27, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:13.136799Z", + "iopub.status.busy": "2026-03-06T20:18:13.136743Z", + "iopub.status.idle": "2026-03-06T20:18:13.154456Z", + "shell.execute_reply": "2026-03-06T20:18:13.154220Z" + } + }, "outputs": [], "source": [ "str_tokens = model.to_str_tokens(clean_tokens[0])\n", @@ -2392,13 +2534,20 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, + "execution_count": 28, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:13.155418Z", + "iopub.status.busy": "2026-03-06T20:18:13.155368Z", + "iopub.status.idle": "2026-03-06T20:18:43.163264Z", + "shell.execute_reply": "2026-03-06T20:18:43.163031Z" + } + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "95a5290e11b64b6a95ef5dd37d027c7a", + "model_id": "9185dfb6e0724cee985f427454d4252e", "version_major": 2, "version_minor": 0 }, @@ -2412,7 +2561,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "be204ae96db74023b957e592a9a0fde9", + "model_id": "775073e43a7a4b2f970c810d2a05c73e", "version_major": 2, "version_minor": 0 }, @@ -2426,7 +2575,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a2409bc6d2524634a48f4556a6773415", + "model_id": "374356a54f2644aea577183fd9478e73", "version_major": 2, "version_minor": 0 }, @@ -2444,9 +2593,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2478,6 +2627,8 @@ } ], "source": [ + "# Activation patching: many forward passes, too slow for CI\n", + "# NBVAL_SKIP\n", "every_block_act_patch_result = patching.get_act_patch_block_every(\n", " model, corrupted_tokens, clean_cache, ioi_metric\n", ")\n", @@ -2496,8 +2647,15 @@ }, { "cell_type": "code", - "execution_count": 30, - "metadata": {}, + "execution_count": 29, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:43.164280Z", + "iopub.status.busy": "2026-03-06T20:18:43.164224Z", + "iopub.status.idle": "2026-03-06T20:18:43.198091Z", + "shell.execute_reply": "2026-03-06T20:18:43.197865Z" + } + }, "outputs": [ { "data": { @@ -2506,9 +2664,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2540,6 +2698,8 @@ } ], "source": [ + "# Depends on patching results above\n", + "# NBVAL_SKIP\n", "def get_attr_patch_block_every(attr_cache):\n", " resid_pre_attr = einops.reduce(\n", " attr_cache.stack_activation(\"resid_pre\"),\n", @@ -2579,8 +2739,15 @@ }, { "cell_type": "code", - "execution_count": 31, - "metadata": {}, + "execution_count": 30, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:43.199077Z", + "iopub.status.busy": "2026-03-06T20:18:43.199017Z", + "iopub.status.idle": "2026-03-06T20:18:43.237874Z", + "shell.execute_reply": "2026-03-06T20:18:43.237641Z" + } + }, "outputs": [ { "data": { @@ -2589,9 +2756,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2623,6 +2790,8 @@ } ], "source": [ + "# Depends on patching results above\n", + "# NBVAL_SKIP\n", "scatter(\n", " y=every_block_attr_patch_result.reshape(3, -1),\n", " x=every_block_act_patch_result.reshape(3, -1),\n", @@ -2652,13 +2821,20 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": {}, + "execution_count": 31, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:18:43.238937Z", + "iopub.status.busy": "2026-03-06T20:18:43.238883Z", + "iopub.status.idle": "2026-03-06T20:19:23.231591Z", + "shell.execute_reply": "2026-03-06T20:19:23.231341Z" + } + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18b2e6b0985b40cd8c0cd1a16ba62975", + "model_id": "455c026fd415476ead7ba2cd47b21657", "version_major": 2, "version_minor": 0 }, @@ -2672,7 +2848,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2d034be6501e4c9db1c290b1705e60f8", + "model_id": "e3c83cf7d60b411582dda6643cf80868", "version_major": 2, "version_minor": 0 }, @@ -2686,7 +2862,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e2f3a429be1745e9a874d2fd4881841d", + "model_id": "812432e3a89441a5ae43623328171c14", "version_major": 2, "version_minor": 0 }, @@ -2700,7 +2876,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f8e5bf04563c4b0da801f3f5e1b08e7e", + "model_id": "b5cf4445bf6f49e1a27474934ef12afa", "version_major": 2, "version_minor": 0 }, @@ -2714,7 +2890,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ae4c563073843a68df3b590cb8b4dc3", + "model_id": "f8fb6cce7c4940cb947e900d00bb0653", "version_major": 2, "version_minor": 0 }, @@ -2732,9 +2908,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2766,6 +2942,8 @@ } ], "source": [ + "# Activation patching: many forward passes, too slow for CI\n", + "# NBVAL_SKIP\n", "every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(\n", " model, corrupted_tokens, clean_cache, ioi_metric\n", ")\n", @@ -2783,8 +2961,15 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, + "execution_count": 32, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:19:23.232636Z", + "iopub.status.busy": "2026-03-06T20:19:23.232579Z", + "iopub.status.idle": "2026-03-06T20:19:23.271106Z", + "shell.execute_reply": "2026-03-06T20:19:23.270884Z" + } + }, "outputs": [ { "data": { @@ -2793,9 +2978,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2827,6 +3012,8 @@ } ], "source": [ + "# Depends on patching results above\n", + "# NBVAL_SKIP\n", "def get_attr_patch_attn_head_all_pos_every(attr_cache):\n", " head_out_all_pos_attr = einops.reduce(\n", " attr_cache.stack_activation(\"z\"),\n", @@ -2882,8 +3069,15 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": {}, + "execution_count": 33, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:19:23.272101Z", + "iopub.status.busy": "2026-03-06T20:19:23.272043Z", + "iopub.status.idle": "2026-03-06T20:19:23.315957Z", + "shell.execute_reply": "2026-03-06T20:19:23.315733Z" + } + }, "outputs": [ { "data": { @@ -2892,9 +3086,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -2926,6 +3120,8 @@ } ], "source": [ + "# Depends on patching results above\n", + "# NBVAL_SKIP\n", "scatter(\n", " y=every_head_all_pos_attr_patch_result.reshape(5, -1),\n", " x=every_head_all_pos_act_patch_result.reshape(5, -1),\n", @@ -2955,8 +3151,15 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": {}, + "execution_count": 34, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:19:23.316964Z", + "iopub.status.busy": "2026-03-06T20:19:23.316909Z", + "iopub.status.idle": "2026-03-06T20:19:23.391975Z", + "shell.execute_reply": "2026-03-06T20:19:23.391755Z" + } + }, "outputs": [ { "data": { @@ -2965,9 +3168,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3004,9 +3207,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3043,9 +3246,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3077,6 +3280,8 @@ } ], "source": [ + "# Depends on patching results above\n", + "# NBVAL_SKIP\n", "graph_tok_labels = [\n", " f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))\n", "]\n", @@ -3111,13 +3316,20 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": {}, + "execution_count": 35, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:19:23.392941Z", + "iopub.status.busy": "2026-03-06T20:19:23.392881Z", + "iopub.status.idle": "2026-03-06T20:29:25.887073Z", + "shell.execute_reply": "2026-03-06T20:29:25.886853Z" + } + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "06f39489001845849fbc7446a07066f4", + "model_id": "d5b59accb4b04231a5a8793f934f64a6", "version_major": 2, "version_minor": 0 }, @@ -3131,7 +3343,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1c2eba74a11f47d0a78dd78bd0e60b84", + "model_id": "3c377339930f49a5891caeb0639a8360", "version_major": 2, "version_minor": 0 }, @@ -3145,7 +3357,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f92f8c8c2ffa4d889def1b4214b6ec04", + "model_id": "4b219fbd1a71443aad741dbef66d8453", "version_major": 2, "version_minor": 0 }, @@ -3159,7 +3371,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "65d0fd01f6dc40409c61f5fde0e30470", + "model_id": "3482eb0700f34eb09d28abf5808d48e5", "version_major": 2, "version_minor": 0 }, @@ -3173,7 +3385,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "52452e90576545f8b12a1bbad5fc7c08", + "model_id": "930ee5028a304b2d87d87b7f3685693e", "version_major": 2, "version_minor": 0 }, @@ -3191,9 +3403,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3225,6 +3437,8 @@ } ], "source": [ + "# 10,800 forward passes of GPT-2; too slow for CI without GPU\n", + "# NBVAL_SKIP\n", "every_head_by_pos_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(\n", " model, corrupted_tokens, clean_cache, ioi_metric\n", ")\n", @@ -3248,8 +3462,15 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, + "execution_count": 36, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:29:25.888066Z", + "iopub.status.busy": "2026-03-06T20:29:25.888007Z", + "iopub.status.idle": "2026-03-06T20:29:25.928401Z", + "shell.execute_reply": "2026-03-06T20:29:25.928178Z" + } + }, "outputs": [ { "data": { @@ -3258,9 +3479,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3351,8 +3572,15 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": {}, + "execution_count": 37, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:29:25.929359Z", + "iopub.status.busy": "2026-03-06T20:29:25.929301Z", + "iopub.status.idle": "2026-03-06T20:29:25.983484Z", + "shell.execute_reply": "2026-03-06T20:29:25.983286Z" + } + }, "outputs": [ { "data": { @@ -3361,9 +3589,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3395,6 +3623,7 @@ } ], "source": [ + "# NBVAL_SKIP\n", "scatter(\n", " y=every_head_by_pos_attr_patch_result.reshape(5, -1),\n", " x=every_head_by_pos_act_patch_result.reshape(5, -1),\n", @@ -3426,21 +3655,132 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": {}, + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:29:25.985185Z", + "iopub.status.busy": "2026-03-06T20:29:25.985112Z", + "iopub.status.idle": "2026-03-06T20:34:23.690898Z", + "shell.execute_reply": "2026-03-06T20:34:23.690659Z" + } + }, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using pad_token, but it is not set yet.\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "54afcd25ec00493497f1b8a75b367a0d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/689 [00:00', 'The', ' E', 'iff', 'el', ' Tower', ' is', ' located', ' in', ' the', ' city', ' of']\n", "Tokenized answer: [' Paris']\n" ] @@ -3501,12 +3841,12 @@ "data": { "text/html": [ "
Performance on answer token:\n",
-       "Rank: 0        Logit: 20.02 Prob: 83.70% Token: | Rome|\n",
+       "Rank: 0        Logit: 20.02 Prob: 83.71% Token: | Rome|\n",
        "
\n" ], "text/plain": [ "Performance on answer token:\n", - "\u001b[1mRank: \u001b[0m\u001b[1;36m0\u001b[0m\u001b[1m Logit: \u001b[0m\u001b[1;36m20.02\u001b[0m\u001b[1m Prob: \u001b[0m\u001b[1;36m83.70\u001b[0m\u001b[1m% Token: | Rome|\u001b[0m\n" + "\u001b[1mRank: \u001b[0m\u001b[1;36m0\u001b[0m\u001b[1m Logit: \u001b[0m\u001b[1;36m20.02\u001b[0m\u001b[1m Prob: \u001b[0m\u001b[1;36m83.71\u001b[0m\u001b[1m% Token: | Rome|\u001b[0m\n" ] }, "metadata": {}, @@ -3516,7 +3856,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Top 0th token. Logit: 20.02 Prob: 83.70% Token: | Rome|\n", + "Top 0th token. Logit: 20.02 Prob: 83.71% Token: | Rome|\n", "Top 1th token. Logit: 17.03 Prob: 4.23% Token: | Naples|\n", "Top 2th token. Logit: 16.85 Prob: 3.51% Token: | Pompe|\n", "Top 3th token. Logit: 16.14 Prob: 1.73% Token: | Ver|\n", @@ -3543,7 +3883,9 @@ } ], "source": [ - "gpt2_xl = HookedTransformer.from_pretrained(\"gpt2-xl\")\n", + "# NBVAL_IGNORE_OUTPUT\n", + "gpt2_xl = TransformerBridge.boot_transformers(\"gpt2-xl\", device=\"cpu\")\n", + "gpt2_xl.enable_compatibility_mode()\n", "clean_prompt = \"The Eiffel Tower is located in the city of\"\n", "clean_answer = \" Paris\"\n", "# corrupted_prompt = \"The red brown fox jumps is located in the city of\"\n", @@ -3555,8 +3897,15 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": {}, + "execution_count": 39, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:34:23.691945Z", + "iopub.status.busy": "2026-03-06T20:34:23.691873Z", + "iopub.status.idle": "2026-03-06T20:34:23.709690Z", + "shell.execute_reply": "2026-03-06T20:34:23.709418Z" + } + }, "outputs": [], "source": [ "clean_answer_index = gpt2_xl.to_single_token(clean_answer)\n", @@ -3569,17 +3918,24 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": {}, + "execution_count": 40, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:34:23.710724Z", + "iopub.status.busy": "2026-03-06T20:34:23.710667Z", + "iopub.status.idle": "2026-03-06T20:34:24.044162Z", + "shell.execute_reply": "2026-03-06T20:34:24.043917Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Clean logit diff: 10.634519577026367\n", - "Corrupted logit diff: -8.988396644592285\n", - "Clean Metric: tensor(1., device='cuda:0', grad_fn=)\n", - "Corrupted Metric: tensor(0., device='cuda:0', grad_fn=)\n" + "Clean logit diff: 10.6345\n", + "Corrupted logit diff: -8.9884\n", + "Clean Metric: tensor(1., grad_fn=)\n", + "Corrupted Metric: tensor(0., grad_fn=)\n" ] } ], @@ -3596,16 +3952,23 @@ " )\n", "\n", "\n", - "print(\"Clean logit diff:\", CLEAN_LOGIT_DIFF_FACTUAL)\n", - "print(\"Corrupted logit diff:\", CORRUPTED_LOGIT_DIFF_FACTUAL)\n", + "print(f\"Clean logit diff: {CLEAN_LOGIT_DIFF_FACTUAL:.4f}\")\n", + "print(f\"Corrupted logit diff: {CORRUPTED_LOGIT_DIFF_FACTUAL:.4f}\")\n", "print(\"Clean Metric:\", factual_metric(clean_logits))\n", "print(\"Corrupted Metric:\", factual_metric(corrupted_logits))" ] }, { "cell_type": "code", - "execution_count": 42, - "metadata": {}, + "execution_count": 41, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:34:24.045168Z", + "iopub.status.busy": "2026-03-06T20:34:24.045109Z", + "iopub.status.idle": "2026-03-06T20:34:24.062460Z", + "shell.execute_reply": "2026-03-06T20:34:24.062179Z" + } + }, "outputs": [], "source": [ "# corrupted_value, corrupted_cache, corrupted_grad_cache = get_cache_fwd_and_bwd(gpt2_xl, corrupted_prompt, factual_metric)" @@ -3613,8 +3976,15 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, + "execution_count": 42, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:34:24.063409Z", + "iopub.status.busy": "2026-03-06T20:34:24.063359Z", + "iopub.status.idle": "2026-03-06T20:34:24.077006Z", + "shell.execute_reply": "2026-03-06T20:34:24.076758Z" + } + }, "outputs": [ { "name": "stdout", @@ -3636,13 +4006,20 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": {}, + "execution_count": 43, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-06T20:34:24.077849Z", + "iopub.status.busy": "2026-03-06T20:34:24.077803Z", + "iopub.status.idle": "2026-03-06T20:35:34.184090Z", + "shell.execute_reply": "2026-03-06T20:35:34.183780Z" + } + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b767eef7a3cd49b9b3cb6e5301463f08", + "model_id": "3e45ca48906245c59f0683c812d81d53", "version_major": 2, "version_minor": 0 }, @@ -3660,9 +4037,9 @@ "\n", "\n", "
\n", - "
\n", + " }) }; \n", "\n", "" ] @@ -3694,19 +4071,23 @@ } ], "source": [ - "def act_patch_residual(clean_cache, corrupted_tokens, model: HookedTransformer, metric):\n", - " if len(corrupted_tokens.shape) == 2:\n", - " corrupted_tokens = corrupted_tokens[0]\n", + "def act_patch_residual(clean_cache, corrupted_tokens, model: TransformerBridge, metric):\n", + " # Ensure tokens are 2D [batch, seq] for TransformerBridge\n", + " if corrupted_tokens.ndim == 1:\n", + " corrupted_tokens = corrupted_tokens.unsqueeze(0)\n", + " seq_len = corrupted_tokens.shape[-1]\n", " residual_patches = torch.zeros(\n", - " (model.cfg.n_layers, len(corrupted_tokens)), device=model.cfg.device\n", + " (model.cfg.n_layers, seq_len), device=model.cfg.device\n", " )\n", "\n", " def residual_hook(resid_pre, hook, layer, pos):\n", + " # Clone to break autograd view chain before inplace modification\n", + " resid_pre = resid_pre.clone()\n", " resid_pre[:, pos, :] = clean_cache[\"resid_pre\", layer][:, pos, :]\n", " return resid_pre\n", "\n", " for layer in tqdm.tqdm(range(model.cfg.n_layers)):\n", - " for pos in range(len(corrupted_tokens)):\n", + " for pos in range(seq_len):\n", " patched_logits = model.run_with_hooks(\n", " corrupted_tokens,\n", " fwd_hooks=[\n", @@ -3730,13 +4111,13 @@ " xaxis=\"Position\",\n", " yaxis=\"Layer\",\n", " x=clean_str_tokens,\n", - ")" + ")\n" ] } ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "transformer-lens", "language": "python", "name": "python3" }, @@ -3750,12 +4131,10814 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.12.12" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "0009ee6f2b5b40fa9e17f4ed631845c3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_8207a641c8104b12a17c6fad47ac627a", + "max": 144, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_121da76c1e834360964e007a8050f8df", + "tabbable": null, + "tooltip": null, + "value": 144 + } + }, + "00404ffb858c414cacd717a11b7a31bd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "00509c13c3fa44cf95149948bd3672a2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d230304b88114f2a9b85f5a48f441ce6", + "placeholder": "​", + "style": "IPY_MODEL_ee535e2cfa694be1a7857b1867b8b608", + "tabbable": null, + "tooltip": null, + "value": " 456k/? [00:00<00:00, 12.7MB/s]" + } + }, + "020cf001eb7d496295a325cbc0ee8718": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "029191a4aa5344d18bf7f10861739eeb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_af67816d50074ae498ef9b600b4175ed", + "placeholder": "​", + "style": "IPY_MODEL_7ac7b11ef3e34bf1a12926c745e08707", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "02bd229aecb244b095eda2b6dc8b3904": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_7a32c6104cdb4eee8dadc248c129040c", + "placeholder": "​", + "style": "IPY_MODEL_8c0e7d4b46c14e2bb2167752820a9274", + "tabbable": null, + "tooltip": null, + "value": " 1.36M/? [00:00<00:00, 17.1MB/s]" + } + }, + "043e0e7fe43744589b7bad2527c2eac0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d3dab66a1c254f07afa02e73e6fd121d", + "placeholder": "​", + "style": "IPY_MODEL_a386fa811d524ae08ac67cce5ebf3a15", + "tabbable": null, + "tooltip": null, + "value": " 1.36M/? [00:00<00:00, 20.0MB/s]" + } + }, + "04d1b3296c75497bb314206d6c7d5341": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_8e6cf78296b14bc381f13658ebf99912", + "placeholder": "​", + "style": "IPY_MODEL_0ebc4d3f1e94415086e749f4cd41b783", + "tabbable": null, + "tooltip": null, + "value": " 1.04M/? [00:00<00:00, 10.6MB/s]" + } + }, + "0582e71e725a4851a1905aceaa3c36ae": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "07149da010c5489696b03653df25cdd2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0cab0dc7fdbd4778a924efc3cc0da67a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_841cecf9a9634f1f9b3f5b91e392568a", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3ea4f600b4414b4bbd3710b2644921ed", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "0e453235a18e4f5c9008040b1420f718": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "0e772dc4f28744a6b528cc5a6b5181d8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "0ebc4d3f1e94415086e749f4cd41b783": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "0fb9d14a0515405da69cc4bb8786a684": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_2a7d5e3aff9e444bb0ce5f2ac0dafe2e", + "max": 180, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2f472952f9bb4332862ed049da93afef", + "tabbable": null, + "tooltip": null, + "value": 180 + } + }, + "0fcbf5121fd54d6b97c761f6a0d358cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0fcf46d0897744d29de840dc273512ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_2569c461e9144c4c82e856e4533449ba", + "placeholder": "​", + "style": "IPY_MODEL_2f332b079a3044fa8ab87f028a7b80b0", + "tabbable": null, + "tooltip": null, + "value": " 48/48 [01:10<00:00,  1.46s/it]" + } + }, + "0fd7652c5e624ef7b2a36a0b0397f51d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "10fc3cb383744edbb5aa1cc1df559358": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "121da76c1e834360964e007a8050f8df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "123b21b4e5fb441baf7d435ba5dd8a24": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_76eb125545bb41dfaec64b1172ba0d4c", + "max": 26, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_686e031d75704124887b21e94e64eb40", + "tabbable": null, + "tooltip": null, + "value": 26 + } + }, + "12c29a51576a400c900ffaf1703f62bb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_5cc6434927224f72aadd34dc0e0c2894", + "placeholder": "​", + "style": "IPY_MODEL_432ff9b9f3574d69b47e8272dd762923", + "tabbable": null, + "tooltip": null, + "value": "merges.txt: " + } + }, + "12f960167a1c417aacdd77ce3a997e35": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_8dd28e04200641a9a2a4e5ed241db518", + "placeholder": "​", + "style": "IPY_MODEL_912fc8506a6f45e38f1573d27eff6457", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "16129fa8a327496f9fe82683bedf75c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_61ceed60568e48cd87035200580f4360", + "IPY_MODEL_20475ce268a9433eb8bf05127ccd983a", + "IPY_MODEL_c43891e5ac164a748faff2d9e71b4a97" + ], + "layout": "IPY_MODEL_10fc3cb383744edbb5aa1cc1df559358", + "tabbable": null, + "tooltip": null + } + }, + "168971d7125c489796073d94fcd4b7aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_ae5d013bec884f4b97d1852b1fb52432", + "placeholder": "​", + "style": "IPY_MODEL_87c101f3cc0f4553870ca9de688b9e83", + "tabbable": null, + "tooltip": null, + "value": "vocab.json: " + } + }, + "1775bd14b2104a078aa63991cc11ba85": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "1797f0e3b47b4b6d8dd79145e066eff3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "17f4c003e2354f5b8f5a967c753f52eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_6dd11b3c888a461aaa85372b044ccd53", + "placeholder": "​", + "style": "IPY_MODEL_dc90adc8272e4e3e929844e2ceef149b", + "tabbable": null, + "tooltip": null, + "value": " 124/124 [00:00<00:00, 54.7kB/s]" + } + }, + "180d2ba6e10e4e808eba69a8517d5080": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_de109b45a18f42b5aa83f63ef683379f", + "placeholder": "​", + "style": "IPY_MODEL_4e44c9999b664ea9bad1b4e360ee76c7", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "1823609b42594e1b9179198575250c33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "18bad67b12f6437b9eb889c256d70970": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1ce4af7abafe4a2cb6e2602e1abc8254": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20475ce268a9433eb8bf05127ccd983a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_f2d44c7aac1a4987a77102feb849c657", + "max": 580, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1797f0e3b47b4b6d8dd79145e066eff3", + "tabbable": null, + "tooltip": null, + "value": 580 + } + }, + "20cadcfcbe5e403c911f9cea70bfda26": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "212c5660764844db8cdc6e3a16099521": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2132efe02e2c4c3fa93a2ea04db76352": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "22e77d7546334e038b64cc2c856a6a13": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "241c137837704376b0410c13921ae36c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2452dd39a79742b29964500360f4a478": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2569c461e9144c4c82e856e4533449ba": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "26b37ecd6ab14144b58cd76629664dcf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2848d61b3098431baa1d82fb85f469fe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "293230277eac481e84ab200e7cd5bdc1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "29376b858cd4489a8fcefc2b096df1e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2970477ecd6545b2bb698748d4019dac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "29bf4e8f0e6042b498c6ac50d8fedf68": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "2a6fd373a6524eb3ae033ea78d3cb61e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "2a7d5e3aff9e444bb0ce5f2ac0dafe2e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2aafc5e9f0f24f8fa8cf01ac2d6d7e4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2ab79d79338946df90a2cba8e497097e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_59190b0bd8e74ee1bab6aea2f931856d", + "placeholder": "​", + "style": "IPY_MODEL_c0aa3c04c0a74717b8fc6700213bf579", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "2f332b079a3044fa8ab87f028a7b80b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "2f472952f9bb4332862ed049da93afef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "30c3ecad3815435a8fc952558e48b212": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "30d6d534fa42478bbde4e2c1ddb60d12": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_700b88d4443848bab341b9d7b00cab54", + "placeholder": "​", + "style": "IPY_MODEL_b2e784a339524df682698e606959668e", + "tabbable": null, + "tooltip": null, + "value": "tokenizer.json: " + } + }, + "31a28b69348b40bfbd14a54380bfb766": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_eddca0196bdf4e1eb5605e557bfe597b", + "placeholder": "​", + "style": "IPY_MODEL_c762da254b434ffea5b7c35e73009302", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "31d69a25403c4e2d8ccf924346bad3fd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_c455478a557645b29777950e364a5006", + "max": 144, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9157c241f7064a8596e1ffaeb850e59c", + "tabbable": null, + "tooltip": null, + "value": 144 + } + }, + "3482eb0700f34eb09d28abf5808d48e5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_56950a06851443158da15d02a0ab0a17", + "IPY_MODEL_50e6f58782f24b0088502b23c13c2f09", + "IPY_MODEL_e39161cc6aa446b48dd0e1600fae5dc5" + ], + "layout": "IPY_MODEL_30c3ecad3815435a8fc952558e48b212", + "tabbable": null, + "tooltip": null + } + }, + "351c909cd34b4df2ace07a5b8364e0e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "357dcb56edba4564b6ec3051f3e977a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "363c2c2e96624875af87c420c7e2cf95": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3673315e0fe741048d2ba304360be671": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "3704e0be72444fd9a07038fbe1b19156": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "374356a54f2644aea577183fd9478e73": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_029191a4aa5344d18bf7f10861739eeb", + "IPY_MODEL_3f52918a2f424cb29b151aa3e8e8fcb7", + "IPY_MODEL_ea7e663957454aca96f66d90c203f330" + ], + "layout": "IPY_MODEL_84867a129d0043b4910ac244e8a984df", + "tabbable": null, + "tooltip": null + } + }, + "387f00b7ea5041e08595fcd2c4f4933a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_1775bd14b2104a078aa63991cc11ba85", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5add17a9b82b4a29a56ade0ef11c6256", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "38e4ab89d9ed4f16a2a481054a18977f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "39409ff188e6463bab5bf783828cdbd6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "39e6dff69f394f7dbab2786a531f6888": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "39f8ccf31f6c49c7a95c59989236a3cf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3c1a63df1c3c40f697a34f60254cb6f3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_5ed82326612a4505a34bc16d6b0b5fa8", + "placeholder": "​", + "style": "IPY_MODEL_69d19b1cf82443ff8994ffd7b156921c", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "3c324e62681a4d60a44231347d0a3402": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_c9571f91e4894ac0ab6f9433d6dd7258", + "placeholder": "​", + "style": "IPY_MODEL_c08b955fa9494958bee9f565c568fc31", + "tabbable": null, + "tooltip": null, + "value": "tokenizer.json: " + } + }, + "3c377339930f49a5891caeb0639a8360": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_cf1b5c1ab05c413d825a067d1bbd193c", + "IPY_MODEL_4ae1c9419a0842e181b2cd69ca867ecd", + "IPY_MODEL_c567d94812a9482d83ae1d67116b41e2" + ], + "layout": "IPY_MODEL_e8122dbf356c439f973ebe1fd249eb42", + "tabbable": null, + "tooltip": null + } + }, + "3d29acbe122346388e66e0741971a810": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "3d83725b10254139a51cb688a495459d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3dc2fa3da5184b2da22329fb9c5a639d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_168971d7125c489796073d94fcd4b7aa", + "IPY_MODEL_8d1828845704489e8c84d24e85e473fb", + "IPY_MODEL_9b8faa8242e94e068bb82a324719b303" + ], + "layout": "IPY_MODEL_1ce4af7abafe4a2cb6e2602e1abc8254", + "tabbable": null, + "tooltip": null + } + }, + "3e0f9511fc62415ebca5e9fd4789d12a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3e45ca48906245c59f0683c812d81d53": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_180d2ba6e10e4e808eba69a8517d5080", + "IPY_MODEL_c4f6afd0756e4acf8252ecebe866de2c", + "IPY_MODEL_0fcf46d0897744d29de840dc273512ee" + ], + "layout": "IPY_MODEL_c654cc379219453a807fbab5cde04900", + "tabbable": null, + "tooltip": null + } + }, + "3ea4f600b4414b4bbd3710b2644921ed": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3f52918a2f424cb29b151aa3e8e8fcb7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_f9e762f547ee4031b2e23cb8aee5deb4", + "max": 180, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7d93595debd040d1afe7635d647ccc81", + "tabbable": null, + "tooltip": null, + "value": 180 + } + }, + "3f6502ba8b5d4848b82f367fe4c55de7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_68f6906692b447f1acec3cef5772fe5a", + "placeholder": "​", + "style": "IPY_MODEL_67cda37b9ae74d3484442b7d3bb19a26", + "tabbable": null, + "tooltip": null, + "value": "merges.txt: " + } + }, + "4000e5115c6d48d687ab9b9695a0d826": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4132df648b67489484a3a657e0d8ed4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d94d7a4f5ab34fc9a4a4ee0a07764461", + "placeholder": "​", + "style": "IPY_MODEL_0fd7652c5e624ef7b2a36a0b0397f51d", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "41e0db38a28c4e10aa81cfdae4bea26c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3f6502ba8b5d4848b82f367fe4c55de7", + "IPY_MODEL_87e1119bd2c54103b083f204120140b9", + "IPY_MODEL_00509c13c3fa44cf95149948bd3672a2" + ], + "layout": "IPY_MODEL_8c9ff139497c4d8caf026eefbcb0b628", + "tabbable": null, + "tooltip": null + } + }, + "42f74b777599485fa13c4d5c249e436d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "432ff9b9f3574d69b47e8272dd762923": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "455c026fd415476ead7ba2cd47b21657": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2ab79d79338946df90a2cba8e497097e", + "IPY_MODEL_d30ceddacb8247689bffcc83981571cf", + "IPY_MODEL_772612a2cf674b9f9028cb302cb9912a" + ], + "layout": "IPY_MODEL_7c633649c6d04456b4201cf608694442", + "tabbable": null, + "tooltip": null + } + }, + "45e840f42f8c46d491678fad7820e835": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4605c4cb08be427590930d8ffb9289d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_b59d1c8089e04592a1b87a7c198d1f6c", + "max": 144, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ba22c66487384f35b78c82777fa87091", + "tabbable": null, + "tooltip": null, + "value": 144 + } + }, + "467969a5e8fa4d95aa459a9f5d31b554": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_212c5660764844db8cdc6e3a16099521", + "placeholder": "​", + "style": "IPY_MODEL_7de33c6a558d40a69a85b3db9e203ae8", + "tabbable": null, + "tooltip": null, + "value": "model.safetensors: 100%" + } + }, + "46c907a0ac31481f9147bf22e2ac5864": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48bb0e01750049b8a4883624a32221a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_91db9856db97451196e16d433896af48", + "placeholder": "​", + "style": "IPY_MODEL_4e68d17b4b7a45369d6917887ddd7a28", + "tabbable": null, + "tooltip": null, + "value": "vocab.json: " + } + }, + "49be2b480d5847a3af7835c317236280": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4aac3150a78e41dfb81c08a9c0861de1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_42f74b777599485fa13c4d5c249e436d", + "max": 148, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3e0f9511fc62415ebca5e9fd4789d12a", + "tabbable": null, + "tooltip": null, + "value": 148 + } + }, + "4ae1c9419a0842e181b2cd69ca867ecd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_aebcd1509a684784a2f99e07e153a5f6", + "max": 2160, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c836782daaa847248009a626db347182", + "tabbable": null, + "tooltip": null, + "value": 2160 + } + }, + "4b219fbd1a71443aad741dbef66d8453": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3c1a63df1c3c40f697a34f60254cb6f3", + "IPY_MODEL_5f730b9ef10e4b8bb97c4fef1bd7cbb2", + "IPY_MODEL_a0df7ae7fcc1441a8c9cca5a80b539b0" + ], + "layout": "IPY_MODEL_39e6dff69f394f7dbab2786a531f6888", + "tabbable": null, + "tooltip": null + } + }, + "4b378e2fe92a4bb5a0cc2adee8a9372d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b79fd3a57e74d97b67849c668fe95b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e44c9999b664ea9bad1b4e360ee76c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "4e68d17b4b7a45369d6917887ddd7a28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "502e8cbf7b7c47b480b8a85405fc24cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "50e6f58782f24b0088502b23c13c2f09": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_5b5b02c66e0e4340a7f099880e893cf9", + "max": 2160, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c0ad8eb99bbe4246972c6db4a9d233a3", + "tabbable": null, + "tooltip": null, + "value": 2160 + } + }, + "52cd22d9c796437692165d3f3ed48e82": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "52d0d6cb8b7e4b06ae6ee7f51eec9419": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d7e105c660824d349c4ee17006f04437", + "placeholder": "​", + "style": "IPY_MODEL_2848d61b3098431baa1d82fb85f469fe", + "tabbable": null, + "tooltip": null, + "value": " 548M/548M [00:18<00:00, 60.2MB/s]" + } + }, + "54af6102260d458db54e634c9814aa6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "54c65cd71486494f8e78f0740aca2b40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_6d3de9443ae74b75930d8397e9c7ed9a", + "placeholder": "​", + "style": "IPY_MODEL_63c8f7fdcf6346d9b3ca140d0f63f8e4", + "tabbable": null, + "tooltip": null, + "value": " 144/144 [00:07<00:00, 18.78it/s]" + } + }, + "54daaf323029464b9b67f8a4f53b3002": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "56950a06851443158da15d02a0ab0a17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_39f8ccf31f6c49c7a95c59989236a3cf", + "placeholder": "​", + "style": "IPY_MODEL_54af6102260d458db54e634c9814aa6f", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "59190b0bd8e74ee1bab6aea2f931856d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5add17a9b82b4a29a56ade0ef11c6256": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5b5b02c66e0e4340a7f099880e893cf9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5cc6434927224f72aadd34dc0e0c2894": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5dcdd724181c46e3aa0e9eda451101eb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5e617862d88745c792f610f6651662f6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ed82326612a4505a34bc16d6b0b5fa8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f1b1b85bb4a4897be8f6b1d5b59a659": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_7ffa006c49564aa8ad58f08f48b98955", + "placeholder": "​", + "style": "IPY_MODEL_357dcb56edba4564b6ec3051f3e977a5", + "tabbable": null, + "tooltip": null, + "value": " 144/144 [00:08<00:00, 15.78it/s]" + } + }, + "5f730b9ef10e4b8bb97c4fef1bd7cbb2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_bfbd7c17cf3746138c6be29ce10a26c1", + "max": 2160, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a36825bfce2c41b3924afc776327d4d8", + "tabbable": null, + "tooltip": null, + "value": 2160 + } + }, + "5fb5c77d41bd4de8911178d2541b6fa7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5fe88cfad7ef40b1892e7a5a341ba69a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "60e5aae936bb41faad191cf2155f78d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "60fe64c4851849aaba2c5156d93d364a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_4b378e2fe92a4bb5a0cc2adee8a9372d", + "placeholder": "​", + "style": "IPY_MODEL_8e4b9fcabfbc4a37a54f08a99e28b220", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "61be4717c2894d1eba936d704dd52390": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_e43087fb83c44e538a2a774d33efa09e", + "max": 548105171, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5dcdd724181c46e3aa0e9eda451101eb", + "tabbable": null, + "tooltip": null, + "value": 548105171 + } + }, + "61ceed60568e48cd87035200580f4360": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d7699d95a0ab4240bfa2754ac81a4dea", + "placeholder": "​", + "style": "IPY_MODEL_b92108f127ec4341af59d110b5f991c4", + "tabbable": null, + "tooltip": null, + "value": "Loading weights: 100%" + } + }, + "61ffe9ae7ab44e94b5729cae80e49437": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "62e2fde99feb46cbb957fcd393fa7280": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "63c63cf6c0b846099d787f3dc52849de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_741e4c3c5c56426c91955f6b0622f629", + "IPY_MODEL_ae756397e14a44dead64ceb4044c7289", + "IPY_MODEL_6c3b456abe2b4d2690600945a7dd8c55" + ], + "layout": "IPY_MODEL_de1370864373438193780364711f6248", + "tabbable": null, + "tooltip": null + } + }, + "63c8f7fdcf6346d9b3ca140d0f63f8e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "6497be00022143ed8ec04f3bfe3f64cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_982965b0bdfe41c6829a1914140650fd", + "max": 124, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ae0ff1942d284be5835b147b38811a49", + "tabbable": null, + "tooltip": null, + "value": 124 + } + }, + "6773a029b78444c5942f40eaf571615f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_38e4ab89d9ed4f16a2a481054a18977f", + "placeholder": "​", + "style": "IPY_MODEL_3673315e0fe741048d2ba304360be671", + "tabbable": null, + "tooltip": null, + "value": "Loading weights: 100%" + } + }, + "67cda37b9ae74d3484442b7d3bb19a26": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "6823f3ab61e14c0b9abc658260b94101": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bb119902818643e5ace138fdfc45173d", + "IPY_MODEL_f7c397d0e7d84175bc4fedcd69e99ca1", + "IPY_MODEL_fd150a5176074e959dfa52a35770b5f0" + ], + "layout": "IPY_MODEL_241c137837704376b0410c13921ae36c", + "tabbable": null, + "tooltip": null + } + }, + "686e031d75704124887b21e94e64eb40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "68f6906692b447f1acec3cef5772fe5a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6961b220931c407daa681ad557f6ba78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69d19b1cf82443ff8994ffd7b156921c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "6c3b456abe2b4d2690600945a7dd8c55": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_e3abfde7cfd24e938684e179059edd9d", + "placeholder": "​", + "style": "IPY_MODEL_293230277eac481e84ab200e7cd5bdc1", + "tabbable": null, + "tooltip": null, + "value": " 26.0/26.0 [00:00<00:00, 3.64kB/s]" + } + }, + "6ca82062740348f2bee12629de7f8e2f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "6d384606af404238b9c4c89adea26c1c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6d3de9443ae74b75930d8397e9c7ed9a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6d4af53ee70a462ebdc51863189f8e2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_39409ff188e6463bab5bf783828cdbd6", + "placeholder": "​", + "style": "IPY_MODEL_7bd58f2c5b444ed9b5f21c6b364c9dce", + "tabbable": null, + "tooltip": null, + "value": " 456k/? [00:00<00:00, 9.34MB/s]" + } + }, + "6dd11b3c888a461aaa85372b044ccd53": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6f7575088f10441c87ded2f68ac37e9f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "700b88d4443848bab341b9d7b00cab54": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "72035846c38c4a439583cc5e974c0a52": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "730901f2bf3d43b1ac64c2f5813a45b7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_c08dfc48a3574ac1ba2e416960d1d3ea", + "placeholder": "​", + "style": "IPY_MODEL_3d29acbe122346388e66e0741971a810", + "tabbable": null, + "tooltip": null, + "value": "generation_config.json: 100%" + } + }, + "737d22cc16184d6a92cf045c476c7a01": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_8f5f5df1b0314449a113f7bb959fa273", + "placeholder": "​", + "style": "IPY_MODEL_7671d9fee96d4e9f921046a1fb092672", + "tabbable": null, + "tooltip": null, + "value": " 689/689 [00:00<00:00, 131kB/s]" + } + }, + "73cd80c764784b4197af01198ba6b886": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_f796802e863c48138fdcec92f546a372", + "placeholder": "​", + "style": "IPY_MODEL_22e77d7546334e038b64cc2c856a6a13", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "7412e9c63c0f4915a2af01c883450f78": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_cb86f659e5ad4c5296c32b97d99d357c", + "placeholder": "​", + "style": "IPY_MODEL_8dacdd0b8b5e4433ae4511433eb7df1d", + "tabbable": null, + "tooltip": null, + "value": "generation_config.json: 100%" + } + }, + "741e4c3c5c56426c91955f6b0622f629": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_5e617862d88745c792f610f6651662f6", + "placeholder": "​", + "style": "IPY_MODEL_60e5aae936bb41faad191cf2155f78d3", + "tabbable": null, + "tooltip": null, + "value": "tokenizer_config.json: 100%" + } + }, + "75b76bb0ff2f491a8e5febeb8166cbd2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_467969a5e8fa4d95aa459a9f5d31b554", + "IPY_MODEL_61be4717c2894d1eba936d704dd52390", + "IPY_MODEL_52d0d6cb8b7e4b06ae6ee7f51eec9419" + ], + "layout": "IPY_MODEL_c9188b4dfa3b480ebab78464a2ff104d", + "tabbable": null, + "tooltip": null + } + }, + "7655d70259dd4dbc8bd4d288f8850b7c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7671d9fee96d4e9f921046a1fb092672": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "76eb125545bb41dfaec64b1172ba0d4c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "772612a2cf674b9f9028cb302cb9912a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_07149da010c5489696b03653df25cdd2", + "placeholder": "​", + "style": "IPY_MODEL_8ccfc9585b794ba293cbe376311c42ba", + "tabbable": null, + "tooltip": null, + "value": " 144/144 [00:07<00:00, 17.62it/s]" + } + }, + "775073e43a7a4b2f970c810d2a05c73e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_12f960167a1c417aacdd77ce3a997e35", + "IPY_MODEL_0fb9d14a0515405da69cc4bb8786a684", + "IPY_MODEL_a6dc16d1952c42ae838c1828b1c9f0b5" + ], + "layout": "IPY_MODEL_d93d361910094de5b5d62e4ae2c96ac9", + "tabbable": null, + "tooltip": null + } + }, + "7a16a107b5084abe812f68a16d02e8b0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a32c6104cdb4eee8dadc248c129040c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7ac7b11ef3e34bf1a12926c745e08707": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7aebe1ba464949849da0e182d90d0669": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7bd58f2c5b444ed9b5f21c6b364c9dce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7c2906dcc3d34ae6846332eb5375cc58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7c633649c6d04456b4201cf608694442": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d0ba24e89554742a562ce50135447f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d93595debd040d1afe7635d647ccc81": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7de33c6a558d40a69a85b3db9e203ae8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7ffa006c49564aa8ad58f08f48b98955": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "812432e3a89441a5ae43623328171c14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b61375bbe9984e0d869c0cad72b655ea", + "IPY_MODEL_8d7fb217280342598b4a2536fd1ef88c", + "IPY_MODEL_df1990c20671455086758d842f54f80f" + ], + "layout": "IPY_MODEL_20cadcfcbe5e403c911f9cea70bfda26", + "tabbable": null, + "tooltip": null + } + }, + "8207a641c8104b12a17c6fad47ac627a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8329289a73c54504a4d450dfa378ac04": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "841cecf9a9634f1f9b3f5b91e392568a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "846d52f5b0f44e2586d7e6dfd3c44844": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_cfee0a94009c461dbdca5b73961f7fbe", + "placeholder": "​", + "style": "IPY_MODEL_f732f9391dd14bcaace6fd5c27a8335a", + "tabbable": null, + "tooltip": null, + "value": " 144/144 [00:07<00:00, 18.66it/s]" + } + }, + "84867a129d0043b4910ac244e8a984df": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "85f0ab0d989a42989ffb354440fe312a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_91335e94218e478fbb5d6126dda9ca61", + "IPY_MODEL_ed95bc25b34947d3aa1c302aa3facfc0", + "IPY_MODEL_b4bee486a3a34a46b495a87ed02c6ada" + ], + "layout": "IPY_MODEL_5fb5c77d41bd4de8911178d2541b6fa7", + "tabbable": null, + "tooltip": null + } + }, + "87c101f3cc0f4553870ca9de688b9e83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "87e1119bd2c54103b083f204120140b9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_0e772dc4f28744a6b528cc5a6b5181d8", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2132efe02e2c4c3fa93a2ea04db76352", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "89eefe764e6b43d3b9a9fe7e4658d82f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ab4ec47b46e401883c185417c452f17": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b79521229ad48ae86924053a0f575f1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8c0e7d4b46c14e2bb2167752820a9274": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "8c9ff139497c4d8caf026eefbcb0b628": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ccfc9585b794ba293cbe376311c42ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "8d1828845704489e8c84d24e85e473fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_cb23a6010b2d47bb821a9b72d283b661", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e3ee9a7a783b4275a78fc4e95c5cfadd", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "8d7fb217280342598b4a2536fd1ef88c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_c0fe834752e24e71be99a853c87aba82", + "max": 144, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cac816368dee481a9fee6b196a2b16d6", + "tabbable": null, + "tooltip": null, + "value": 144 + } + }, + "8dacdd0b8b5e4433ae4511433eb7df1d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "8dd28e04200641a9a2a4e5ed241db518": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e4b9fcabfbc4a37a54f08a99e28b220": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "8e6cf78296b14bc381f13658ebf99912": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f5f5df1b0314449a113f7bb959fa273": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f8ffe07f8314800ad637194c8c7d10f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "912fc8506a6f45e38f1573d27eff6457": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "91335e94218e478fbb5d6126dda9ca61": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_a996f052d0ed4d938f0c71fc72e8c1b6", + "placeholder": "​", + "style": "IPY_MODEL_2a6fd373a6524eb3ae033ea78d3cb61e", + "tabbable": null, + "tooltip": null, + "value": "config.json: 100%" + } + }, + "9157c241f7064a8596e1ffaeb850e59c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9185dfb6e0724cee985f427454d4252e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f950cda925b0497990e1ca32a8883bcc", + "IPY_MODEL_e7a67acf48bc47a6a04a8349c80f149f", + "IPY_MODEL_9a944a1fc9504b7ca5cd587c0b34371f" + ], + "layout": "IPY_MODEL_c3804dc9990243d591746c205377760d", + "tabbable": null, + "tooltip": null + } + }, + "91db9856db97451196e16d433896af48": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "91f733061a614e7caa3a857c710c9039": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_c51496d64234439ebbfec98b59f44803", + "placeholder": "​", + "style": "IPY_MODEL_29bf4e8f0e6042b498c6ac50d8fedf68", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "930ee5028a304b2d87d87b7f3685693e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_91f733061a614e7caa3a857c710c9039", + "IPY_MODEL_99f977d0bd87444d9b78772bd84b2c8e", + "IPY_MODEL_ca1227195a3946c1a803126b165b9af0" + ], + "layout": "IPY_MODEL_49be2b480d5847a3af7835c317236280", + "tabbable": null, + "tooltip": null + } + }, + "954f099efbd3462fb488406f502fb70e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "96b11cc7ef2c4a5a96812363b30759a3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "97f6ad1918334a3ab503d4a5da11c9ef": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "982965b0bdfe41c6829a1914140650fd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "98bce027ddf046c29bbbb03c6e9b1de3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "99f977d0bd87444d9b78772bd84b2c8e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_7a16a107b5084abe812f68a16d02e8b0", + "max": 2160, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a395cee8c6ec4648bb952ec6343b24ad", + "tabbable": null, + "tooltip": null, + "value": 2160 + } + }, + "9a944a1fc9504b7ca5cd587c0b34371f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d95b7b9cf6914acb9d1152502d2ba41b", + "placeholder": "​", + "style": "IPY_MODEL_d7c69daa5fa44a6487f5dc66380ec31a", + "tabbable": null, + "tooltip": null, + "value": " 180/180 [00:10<00:00, 18.55it/s]" + } + }, + "9b50599c4a084d9eb5b8755040c9bd32": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9b8faa8242e94e068bb82a324719b303": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_f5bbf314d840422b9e486386de3f5bb6", + "placeholder": "​", + "style": "IPY_MODEL_dc68cf2bf3e94df881377a106754a350", + "tabbable": null, + "tooltip": null, + "value": " 1.04M/? [00:00<00:00, 10.5MB/s]" + } + }, + "9ba87779605c4969bf48b4071a94c630": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_730901f2bf3d43b1ac64c2f5813a45b7", + "IPY_MODEL_e9dfce1b7d944947b19c22b970c32b87", + "IPY_MODEL_a7878d3d98f84a3bbe0ff6bfd94ef2ea" + ], + "layout": "IPY_MODEL_bcff757716c049a6a49f5af659e164a1", + "tabbable": null, + "tooltip": null + } + }, + "9d61281b0ee14d3fa86d1bc0dc3152a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d378df4c155647ff92126c3e9742e874", + "IPY_MODEL_123b21b4e5fb441baf7d435ba5dd8a24", + "IPY_MODEL_c6754ee7d6fb41899c51556eea37ee4f" + ], + "layout": "IPY_MODEL_00404ffb858c414cacd717a11b7a31bd", + "tabbable": null, + "tooltip": null + } + }, + "a05625d67e634f2a80c65cdcfcbe8f8c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a0ce07721af346faa19a490bb93eee61": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_363c2c2e96624875af87c420c7e2cf95", + "placeholder": "​", + "style": "IPY_MODEL_b732ea0e03674d4384ac0d2dbf2a5f69", + "tabbable": null, + "tooltip": null, + "value": "config.json: 100%" + } + }, + "a0df7ae7fcc1441a8c9cca5a80b539b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_bcaf8ebca41240bd86dde0c617b68f01", + "placeholder": "​", + "style": "IPY_MODEL_dd6bf89931a64c63bbcd2cf526835c2d", + "tabbable": null, + "tooltip": null, + "value": " 2160/2160 [02:00<00:00, 18.64it/s]" + } + }, + "a1f007e8fb68491daa3ba444ca49f505": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_48bb0e01750049b8a4883624a32221a5", + "IPY_MODEL_0cab0dc7fdbd4778a924efc3cc0da67a", + "IPY_MODEL_04d1b3296c75497bb314206d6c7d5341" + ], + "layout": "IPY_MODEL_e08307b6cbe649fa95ca5bdfdbeae3a2", + "tabbable": null, + "tooltip": null + } + }, + "a314648aea3d40f89ac931c47f81c9e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "a36825bfce2c41b3924afc776327d4d8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a386fa811d524ae08ac67cce5ebf3a15": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "a395cee8c6ec4648bb952ec6343b24ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a6dc16d1952c42ae838c1828b1c9f0b5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_7655d70259dd4dbc8bd4d288f8850b7c", + "placeholder": "​", + "style": "IPY_MODEL_eb49abf5e8ea41e69c1307f78fda4a90", + "tabbable": null, + "tooltip": null, + "value": " 180/180 [00:09<00:00, 18.56it/s]" + } + }, + "a711143026bc46a5b1b7bc3dccca1850": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_b3c7850b621346e995ccdb689fe20c5e", + "max": 689, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1823609b42594e1b9179198575250c33", + "tabbable": null, + "tooltip": null, + "value": 689 + } + }, + "a7878d3d98f84a3bbe0ff6bfd94ef2ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_2970477ecd6545b2bb698748d4019dac", + "placeholder": "​", + "style": "IPY_MODEL_7c2906dcc3d34ae6846332eb5375cc58", + "tabbable": null, + "tooltip": null, + "value": " 124/124 [00:00<00:00, 73.2kB/s]" + } + }, + "a92811787eb84dd19d9ec2fb2eab7eee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a996f052d0ed4d938f0c71fc72e8c1b6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa036d7308b546f0ba4f77509cc8f8d9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac80df1b39654611afa84893a8911c2a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae0ff1942d284be5835b147b38811a49": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ae5d013bec884f4b97d1852b1fb52432": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae756397e14a44dead64ceb4044c7289": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_4b79fd3a57e74d97b67849c668fe95b5", + "max": 26, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2aafc5e9f0f24f8fa8cf01ac2d6d7e4f", + "tabbable": null, + "tooltip": null, + "value": 26 + } + }, + "aebcd1509a684784a2f99e07e153a5f6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af67816d50074ae498ef9b600b4175ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b2e784a339524df682698e606959668e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "b3c7850b621346e995ccdb689fe20c5e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b41e54c58689400b86fc0dbf18e4bbaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b4bee486a3a34a46b495a87ed02c6ada": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_b75438a235a243d49404467d54b63373", + "placeholder": "​", + "style": "IPY_MODEL_8f8ffe07f8314800ad637194c8c7d10f", + "tabbable": null, + "tooltip": null, + "value": " 665/665 [00:00<00:00, 122kB/s]" + } + }, + "b59d1c8089e04592a1b87a7c198d1f6c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5cf4445bf6f49e1a27474934ef12afa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_60fe64c4851849aaba2c5156d93d364a", + "IPY_MODEL_31d69a25403c4e2d8ccf924346bad3fd", + "IPY_MODEL_846d52f5b0f44e2586d7e6dfd3c44844" + ], + "layout": "IPY_MODEL_8b79521229ad48ae86924053a0f575f1", + "tabbable": null, + "tooltip": null + } + }, + "b61375bbe9984e0d869c0cad72b655ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_97f6ad1918334a3ab503d4a5da11c9ef", + "placeholder": "​", + "style": "IPY_MODEL_0e453235a18e4f5c9008040b1420f718", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "b732ea0e03674d4384ac0d2dbf2a5f69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "b75438a235a243d49404467d54b63373": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b92108f127ec4341af59d110b5f991c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "ba22c66487384f35b78c82777fa87091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bb119902818643e5ace138fdfc45173d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_45e840f42f8c46d491678fad7820e835", + "placeholder": "​", + "style": "IPY_MODEL_e29dd892780a4208b61593998e588e1f", + "tabbable": null, + "tooltip": null, + "value": "model.safetensors: 100%" + } + }, + "bb91ef19e455404aac3d283f868f9687": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_30d6d534fa42478bbde4e2c1ddb60d12", + "IPY_MODEL_387f00b7ea5041e08595fcd2c4f4933a", + "IPY_MODEL_02bd229aecb244b095eda2b6dc8b3904" + ], + "layout": "IPY_MODEL_ee00e160272e4823a9f03b324c3709f0", + "tabbable": null, + "tooltip": null + } + }, + "bc931a51f73848ad84a27ecffa06317a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a0ce07721af346faa19a490bb93eee61", + "IPY_MODEL_a711143026bc46a5b1b7bc3dccca1850", + "IPY_MODEL_737d22cc16184d6a92cf045c476c7a01" + ], + "layout": "IPY_MODEL_c52de9ceefd74f0fadf32fe08fa9586d", + "tabbable": null, + "tooltip": null + } + }, + "bcaf8ebca41240bd86dde0c617b68f01": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bcff757716c049a6a49f5af659e164a1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bfbd7c17cf3746138c6be29ce10a26c1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c08b955fa9494958bee9f565c568fc31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "c08dfc48a3574ac1ba2e416960d1d3ea": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c0aa3c04c0a74717b8fc6700213bf579": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "c0ad8eb99bbe4246972c6db4a9d233a3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c0fe834752e24e71be99a853c87aba82": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c3804dc9990243d591746c205377760d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c43891e5ac164a748faff2d9e71b4a97": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_2452dd39a79742b29964500360f4a478", + "placeholder": "​", + "style": "IPY_MODEL_6ca82062740348f2bee12629de7f8e2f", + "tabbable": null, + "tooltip": null, + "value": " 580/580 [00:00<00:00, 5679.21it/s, Materializing param=transformer.wte.weight]" + } + }, + "c455478a557645b29777950e364a5006": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c4f6afd0756e4acf8252ecebe866de2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_aa036d7308b546f0ba4f77509cc8f8d9", + "max": 48, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d798ec8e934d48799b62c97e509db130", + "tabbable": null, + "tooltip": null, + "value": 48 + } + }, + "c51496d64234439ebbfec98b59f44803": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c52de9ceefd74f0fadf32fe08fa9586d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c567d94812a9482d83ae1d67116b41e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_cb791fd8c7ae49079f9162125e98ff79", + "placeholder": "​", + "style": "IPY_MODEL_52cd22d9c796437692165d3f3ed48e82", + "tabbable": null, + "tooltip": null, + "value": " 2160/2160 [02:00<00:00, 15.47it/s]" + } + }, + "c5bca44eefb940d39fba70d4fff71571": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_12c29a51576a400c900ffaf1703f62bb", + "IPY_MODEL_ee52dc00e394472a9c40fcbf5b1a7e1c", + "IPY_MODEL_6d4af53ee70a462ebdc51863189f8e2c" + ], + "layout": "IPY_MODEL_ac80df1b39654611afa84893a8911c2a", + "tabbable": null, + "tooltip": null + } + }, + "c654cc379219453a807fbab5cde04900": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6754ee7d6fb41899c51556eea37ee4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_f44f01a296fb4d198718574f1f802ba2", + "placeholder": "​", + "style": "IPY_MODEL_c6b1b39c22564a59bc5b950d7a46708f", + "tabbable": null, + "tooltip": null, + "value": " 26.0/26.0 [00:00<00:00, 4.24kB/s]" + } + }, + "c6b1b39c22564a59bc5b950d7a46708f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "c762da254b434ffea5b7c35e73009302": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "c79c48efbfdb44b0a70769c503f4e622": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_7d0ba24e89554742a562ce50135447f0", + "placeholder": "​", + "style": "IPY_MODEL_502e8cbf7b7c47b480b8a85405fc24cf", + "tabbable": null, + "tooltip": null, + "value": " 2160/2160 [02:00<00:00, 17.95it/s]" + } + }, + "c836782daaa847248009a626db347182": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c9188b4dfa3b480ebab78464a2ff104d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9571f91e4894ac0ab6f9433d6dd7258": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca1227195a3946c1a803126b165b9af0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_3d83725b10254139a51cb688a495459d", + "placeholder": "​", + "style": "IPY_MODEL_020cf001eb7d496295a325cbc0ee8718", + "tabbable": null, + "tooltip": null, + "value": " 2160/2160 [02:00<00:00, 18.75it/s]" + } + }, + "cac816368dee481a9fee6b196a2b16d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cb23a6010b2d47bb821a9b72d283b661": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "20px" + } + }, + "cb791fd8c7ae49079f9162125e98ff79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb86f659e5ad4c5296c32b97d99d357c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf1b5c1ab05c413d825a067d1bbd193c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_72035846c38c4a439583cc5e974c0a52", + "placeholder": "​", + "style": "IPY_MODEL_7aebe1ba464949849da0e182d90d0669", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "cfac04648596443dad2ddc11f2bdd5c1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cfee0a94009c461dbdca5b73961f7fbe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d1e752f79bbc40ddbae7a02895c9b74e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "d230304b88114f2a9b85f5a48f441ce6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d30ceddacb8247689bffcc83981571cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_6d384606af404238b9c4c89adea26c1c", + "max": 144, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_54daaf323029464b9b67f8a4f53b3002", + "tabbable": null, + "tooltip": null, + "value": 144 + } + }, + "d378df4c155647ff92126c3e9742e874": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_8ab4ec47b46e401883c185417c452f17", + "placeholder": "​", + "style": "IPY_MODEL_fdb4d541f8414fe4bee9265554cd7522", + "tabbable": null, + "tooltip": null, + "value": "tokenizer_config.json: 100%" + } + }, + "d3dab66a1c254f07afa02e73e6fd121d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d49d46b5e41740c491adbd616855124a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3c324e62681a4d60a44231347d0a3402", + "IPY_MODEL_da67b3877eae4098a475ca80be66db0d", + "IPY_MODEL_043e0e7fe43744589b7bad2527c2eac0" + ], + "layout": "IPY_MODEL_6961b220931c407daa681ad557f6ba78", + "tabbable": null, + "tooltip": null + } + }, + "d5b59accb4b04231a5a8793f934f64a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_73cd80c764784b4197af01198ba6b886", + "IPY_MODEL_de6a648e05ca45c2bf85168fa41cf2aa", + "IPY_MODEL_c79c48efbfdb44b0a70769c503f4e622" + ], + "layout": "IPY_MODEL_954f099efbd3462fb488406f502fb70e", + "tabbable": null, + "tooltip": null + } + }, + "d7699d95a0ab4240bfa2754ac81a4dea": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d798ec8e934d48799b62c97e509db130": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d7c69daa5fa44a6487f5dc66380ec31a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "d7e105c660824d349c4ee17006f04437": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d93d361910094de5b5d62e4ae2c96ac9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d94d7a4f5ab34fc9a4a4ee0a07764461": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d95b7b9cf6914acb9d1152502d2ba41b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "da67b3877eae4098a475ca80be66db0d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_0582e71e725a4851a1905aceaa3c36ae", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_62e2fde99feb46cbb957fcd393fa7280", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "db91637067cb428c94674237eabda8f7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dc68cf2bf3e94df881377a106754a350": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "dc90adc8272e4e3e929844e2ceef149b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "dd6bf89931a64c63bbcd2cf526835c2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "dd98180b8cba40aa82fc6221dd4676d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "de109b45a18f42b5aa83f63ef683379f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "de1370864373438193780364711f6248": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "de6a648e05ca45c2bf85168fa41cf2aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_ffeaf662bd6a4e0a948a2d63d8e2c424", + "max": 2160, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_26b37ecd6ab14144b58cd76629664dcf", + "tabbable": null, + "tooltip": null, + "value": 2160 + } + }, + "df1990c20671455086758d842f54f80f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_b41e54c58689400b86fc0dbf18e4bbaa", + "placeholder": "​", + "style": "IPY_MODEL_98bce027ddf046c29bbbb03c6e9b1de3", + "tabbable": null, + "tooltip": null, + "value": " 144/144 [00:08<00:00, 18.70it/s]" + } + }, + "df2f48a5055b4cb7a9db8115c407fd8c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e08307b6cbe649fa95ca5bdfdbeae3a2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e29dd892780a4208b61593998e588e1f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "e2c844c07e434e718186afaf72312371": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e39161cc6aa446b48dd0e1600fae5dc5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_29376b858cd4489a8fcefc2b096df1e5", + "placeholder": "​", + "style": "IPY_MODEL_6f7575088f10441c87ded2f68ac37e9f", + "tabbable": null, + "tooltip": null, + "value": " 2160/2160 [01:59<00:00, 18.72it/s]" + } + }, + "e3abfde7cfd24e938684e179059edd9d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3c83cf7d60b411582dda6643cf80868": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_31a28b69348b40bfbd14a54380bfb766", + "IPY_MODEL_0009ee6f2b5b40fa9e17f4ed631845c3", + "IPY_MODEL_54c65cd71486494f8e78f0740aca2b40" + ], + "layout": "IPY_MODEL_cfac04648596443dad2ddc11f2bdd5c1", + "tabbable": null, + "tooltip": null + } + }, + "e3ee9a7a783b4275a78fc4e95c5cfadd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e43087fb83c44e538a2a774d33efa09e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7504caac9374ea4b4f1691350131f64": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7a67acf48bc47a6a04a8349c80f149f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_df2f48a5055b4cb7a9db8115c407fd8c", + "max": 180, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9b50599c4a084d9eb5b8755040c9bd32", + "tabbable": null, + "tooltip": null, + "value": 180 + } + }, + "e8122dbf356c439f973ebe1fd249eb42": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e9dfce1b7d944947b19c22b970c32b87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_351c909cd34b4df2ace07a5b8364e0e5", + "max": 124, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_18bad67b12f6437b9eb889c256d70970", + "tabbable": null, + "tooltip": null, + "value": 124 + } + }, + "ea7e663957454aca96f66d90c203f330": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_db91637067cb428c94674237eabda8f7", + "placeholder": "​", + "style": "IPY_MODEL_dd98180b8cba40aa82fc6221dd4676d0", + "tabbable": null, + "tooltip": null, + "value": " 180/180 [00:09<00:00, 17.32it/s]" + } + }, + "eb49abf5e8ea41e69c1307f78fda4a90": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "ed2e29a92a38476b92b8921d12824bbc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6773a029b78444c5942f40eaf571615f", + "IPY_MODEL_4aac3150a78e41dfb81c08a9c0861de1", + "IPY_MODEL_fa59e0a011d64aeab46608717d310eef" + ], + "layout": "IPY_MODEL_46c907a0ac31481f9147bf22e2ac5864", + "tabbable": null, + "tooltip": null + } + }, + "ed95bc25b34947d3aa1c302aa3facfc0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_e7504caac9374ea4b4f1691350131f64", + "max": 665, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5fe88cfad7ef40b1892e7a5a341ba69a", + "tabbable": null, + "tooltip": null, + "value": 665 + } + }, + "eddca0196bdf4e1eb5605e557bfe597b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee00e160272e4823a9f03b324c3709f0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee52dc00e394472a9c40fcbf5b1a7e1c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_96b11cc7ef2c4a5a96812363b30759a3", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4000e5115c6d48d687ab9b9695a0d826", + "tabbable": null, + "tooltip": null, + "value": 1 + } + }, + "ee535e2cfa694be1a7857b1867b8b608": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "f2d44c7aac1a4987a77102feb849c657": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f2f7396e6d4e4943b643ec28bd417f65": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7412e9c63c0f4915a2af01c883450f78", + "IPY_MODEL_6497be00022143ed8ec04f3bfe3f64cb", + "IPY_MODEL_17f4c003e2354f5b8f5a967c753f52eb" + ], + "layout": "IPY_MODEL_0fcbf5121fd54d6b97c761f6a0d358cb", + "tabbable": null, + "tooltip": null + } + }, + "f44f01a296fb4d198718574f1f802ba2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f5bbf314d840422b9e486386de3f5bb6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f732f9391dd14bcaace6fd5c27a8335a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "f796802e863c48138fdcec92f546a372": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7c397d0e7d84175bc4fedcd69e99ca1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_89eefe764e6b43d3b9a9fe7e4658d82f", + "max": 6431829964, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a92811787eb84dd19d9ec2fb2eab7eee", + "tabbable": null, + "tooltip": null, + "value": 6431829964 + } + }, + "f8fb6cce7c4940cb947e900d00bb0653": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4132df648b67489484a3a657e0d8ed4c", + "IPY_MODEL_4605c4cb08be427590930d8ffb9289d6", + "IPY_MODEL_5f1b1b85bb4a4897be8f6b1d5b59a659" + ], + "layout": "IPY_MODEL_8329289a73c54504a4d450dfa378ac04", + "tabbable": null, + "tooltip": null + } + }, + "f950cda925b0497990e1ca32a8883bcc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_e2c844c07e434e718186afaf72312371", + "placeholder": "​", + "style": "IPY_MODEL_3704e0be72444fd9a07038fbe1b19156", + "tabbable": null, + "tooltip": null, + "value": "100%" + } + }, + "f9e762f547ee4031b2e23cb8aee5deb4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fa59e0a011d64aeab46608717d310eef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_61ffe9ae7ab44e94b5729cae80e49437", + "placeholder": "​", + "style": "IPY_MODEL_a314648aea3d40f89ac931c47f81c9e6", + "tabbable": null, + "tooltip": null, + "value": " 148/148 [00:00<00:00, 5498.19it/s, Materializing param=transformer.wte.weight]" + } + }, + "fd150a5176074e959dfa52a35770b5f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_a05625d67e634f2a80c65cdcfcbe8f8c", + "placeholder": "​", + "style": "IPY_MODEL_d1e752f79bbc40ddbae7a02895c9b74e", + "tabbable": null, + "tooltip": null, + "value": " 6.43G/6.43G [04:52<00:00, 111MB/s]" + } + }, + "fdb4d541f8414fe4bee9265554cd7522": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "ffeaf662bd6a4e0a948a2d63d8e2c424": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + }, + "version_major": 2, + "version_minor": 0 } } }, diff --git a/demos/BERT.ipynb b/demos/BERT.ipynb index a46b49976..9338fd30e 100644 --- a/demos/BERT.ipynb +++ b/demos/BERT.ipynb @@ -28,16 +28,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, - "outputs": [], - "source": "# NBVAL_IGNORE_OUTPUT\nimport os\n\n# Janky code to do different setup when run in a Colab notebook vs VSCode\nDEVELOPMENT_MODE = False\nIN_GITHUB = os.getenv(\"GITHUB_ACTIONS\") == \"true\"\ntry:\n import google.colab\n\n IN_COLAB = True\n print(\"Running as a Colab notebook\")\n\n # PySvelte is an unmaintained visualization library, use it as a backup if circuitsvis isn't working\n # # Install another version of node that makes PySvelte work way faster\n # !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs\n # %pip install git+https://github.com/neelnanda-io/PySvelte.git\nexcept:\n IN_COLAB = False\n\nif not IN_GITHUB and not IN_COLAB:\n print(\"Running as a Jupyter notebook - intended for development only!\")\n from IPython import get_ipython\n\n ipython = get_ipython()\n # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n ipython.run_line_magic(\"load_ext\", \"autoreload\")\n ipython.run_line_magic(\"autoreload\", \"2\")\n\nif IN_COLAB:\n %pip install transformer_lens\n %pip install circuitsvis" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running as a Jupyter notebook - intended for development only!\n", + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "# NBVAL_IGNORE_OUTPUT\n", + "import os\n", + "\n", + "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", + "DEVELOPMENT_MODE = False\n", + "IN_GITHUB = os.getenv(\"GITHUB_ACTIONS\") == \"true\"\n", + "try:\n", + " import google.colab\n", + "\n", + " IN_COLAB = True\n", + " print(\"Running as a Colab notebook\")\n", + "\n", + " # PySvelte is an unmaintained visualization library, use it as a backup if circuitsvis isn't working\n", + " # # Install another version of node that makes PySvelte work way faster\n", + " # !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs\n", + " # %pip install git+https://github.com/neelnanda-io/PySvelte.git\n", + "except:\n", + " IN_COLAB = False\n", + "\n", + "if not IN_GITHUB and not IN_COLAB:\n", + " print(\"Running as a Jupyter notebook - intended for development only!\")\n", + " from IPython import get_ipython\n", + "\n", + " ipython = get_ipython()\n", + " # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n", + " ipython.run_line_magic(\"load_ext\", \"autoreload\")\n", + " ipython.run_line_magic(\"autoreload\", \"2\")\n", + "\n", + "if IN_COLAB:\n", + " %pip install transformer_lens\n", + " %pip install circuitsvis" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using renderer: colab\n" + ] + } + ], "source": [ "# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n", "import plotly.io as pio\n", @@ -51,27 +101,27 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -85,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -94,12 +144,12 @@ "\n", "from transformers import AutoTokenizer\n", "\n", - "from transformer_lens import HookedEncoder, BertNextSentencePrediction" + "from transformer_lens.model_bridge import TransformerBridge" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -119,30 +169,29 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Support for BERT in TransformerLens is currently experimental, until such a time when it has feature parity with HookedTransformer and has been tested on real research tasks. Until then, backward compatibility is not guaranteed. Please see the docs for information on the limitations of the current implementation.\n", - "If using BERT for interpretability research, keep in mind that BERT has some significant architectural differences to GPT. For example, LayerNorms are applied *after* the attention and MLP components, meaning that the last LayerNorm in a block cannot be folded.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Moving model to device: mps\n", - "Loaded pretrained model bert-base-cased into HookedTransformer\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1d4b75dcfcbf488da7196992cde5c9bb", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading weights: 0%| | 0/202 [00:00', 'After', ' John', ' and', ' Mary', ' went', ' to', ' the', ' store', ',', ' John', ' gave', ' a', ' bottle', ' of', ' milk', ' to']\n", - "Tokenized answer: [' Mary']\n" - ] - }, - { - "data": { - "text/html": [ - "
Performance on answer token:\n",
-       "Rank: 0        Logit: 18.09 Prob: 70.07% Token: | Mary|\n",
-       "
\n" - ], - "text/plain": [ - "Performance on answer token:\n", - "\u001b[1mRank: \u001b[0m\u001b[1;36m0\u001b[0m\u001b[1m Logit: \u001b[0m\u001b[1;36m18.09\u001b[0m\u001b[1m Prob: \u001b[0m\u001b[1;36m70.07\u001b[0m\u001b[1m% Token: | Mary|\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Top 0th token. Logit: 18.09 Prob: 70.07% Token: | Mary|\n", - "Top 1th token. Logit: 15.38 Prob: 4.67% Token: | the|\n", - "Top 2th token. Logit: 15.35 Prob: 4.54% Token: | John|\n", - "Top 3th token. Logit: 15.25 Prob: 4.11% Token: | them|\n", - "Top 4th token. Logit: 14.84 Prob: 2.73% Token: | his|\n", - "Top 5th token. Logit: 14.06 Prob: 1.24% Token: | her|\n", - "Top 6th token. Logit: 13.54 Prob: 0.74% Token: | a|\n", - "Top 7th token. Logit: 13.52 Prob: 0.73% Token: | their|\n", - "Top 8th token. Logit: 13.13 Prob: 0.49% Token: | Jesus|\n", - "Top 9th token. Logit: 12.97 Prob: 0.42% Token: | him|\n" - ] - }, - { - "data": { - "text/html": [ - "
Ranks of the answer tokens: [(' Mary', 0)]\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mRanks of the answer tokens:\u001b[0m \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m' Mary'\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "example_prompt = \"After John and Mary went to the store, John gave a bottle of milk to\"\n", "example_answer = \" Mary\"\n", @@ -372,25 +297,16 @@ "\n", "We want models that can take in arbitrary text, but models need to have a fixed vocabulary. So the solution is to define a vocabulary of **tokens** and to deterministically break up arbitrary text into tokens. Tokens are, essentially, subwords, and are determined by finding the most frequent substrings - this means that tokens vary a lot in length and frequency! \n", "\n", - "Tokens are a *massive* headache and are one of the most annoying things about reverse engineering language models... Different names will be different numbers of tokens, different prompts will have the relevant tokens at different positions, different prompts will have different total numbers of tokens, etc. Language models often devote significant amounts of parameters in early layers to convert inputs from tokens to a more sensible internal format (and do the reverse in later layers). You really, really want to avoid needing to think about tokenization wherever possible when doing exploratory analysis (though, of course, it's relevant later when trying to flesh out your analysis and make it rigorous!). HookedTransformer comes with several helper methods to deal with tokens: `to_tokens, to_string, to_str_tokens, to_single_token, get_token_position`\n", + "Tokens are a *massive* headache and are one of the most annoying things about reverse engineering language models... Different names will be different numbers of tokens, different prompts will have the relevant tokens at different positions, different prompts will have different total numbers of tokens, etc. Language models often devote significant amounts of parameters in early layers to convert inputs from tokens to a more sensible internal format (and do the reverse in later layers). You really, really want to avoid needing to think about tokenization wherever possible when doing exploratory analysis (though, of course, it's relevant later when trying to flesh out your analysis and make it rigorous!). TransformerBridge comes with several helper methods to deal with tokens: `to_tokens, to_string, to_str_tokens, to_single_token, get_token_position`\n", "\n", "**Exercise:** I recommend using `model.to_str_tokens` to explore how the model tokenizes different strings. In particular, try adding or removing spaces at the start, or changing capitalization - these change tokenization!" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['When John and Mary went to the shops, John gave the bag to', 'When John and Mary went to the shops, Mary gave the bag to', 'When Tom and James went to the park, James gave the ball to', 'When Tom and James went to the park, Tom gave the ball to', 'When Dan and Sid went to the shops, Sid gave an apple to', 'When Dan and Sid went to the shops, Dan gave an apple to', 'After Martin and Amy went to the park, Amy gave a drink to', 'After Martin and Amy went to the park, Martin gave a drink to']\n", - "[(' Mary', ' John'), (' John', ' Mary'), (' Tom', ' James'), (' James', ' Tom'), (' Dan', ' Sid'), (' Sid', ' Dan'), (' Martin', ' Amy'), (' Amy', ' Martin')]\n" - ] - } - ], + "outputs": [], "source": [ "prompt_format = [\n", " \"When John and Mary went to the shops,{} gave the bag to\",\n", @@ -440,32 +356,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' John', ' and', ' Mary', ' went', ' to', ' the', ' shops', ',', ' John', ' gave', ' the', ' bag', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' John', ' and', ' Mary', ' went', ' to', ' the', ' shops', ',', ' Mary', ' gave', ' the', ' bag', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' Tom', ' and', ' James', ' went', ' to', ' the', ' park', ',', ' James', ' gave', ' the', ' ball', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' Tom', ' and', ' James', ' went', ' to', ' the', ' park', ',', ' Tom', ' gave', ' the', ' ball', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' Dan', ' and', ' Sid', ' went', ' to', ' the', ' shops', ',', ' Sid', ' gave', ' an', ' apple', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'When', ' Dan', ' and', ' Sid', ' went', ' to', ' the', ' shops', ',', ' Dan', ' gave', ' an', ' apple', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'After', ' Martin', ' and', ' Amy', ' went', ' to', ' the', ' park', ',', ' Amy', ' gave', ' a', ' drink', ' to']\n", - "Prompt length: 15\n", - "Prompt as tokens: ['<|endoftext|>', 'After', ' Martin', ' and', ' Amy', ' went', ' to', ' the', ' park', ',', ' Martin', ' gave', ' a', ' drink', ' to']\n" - ] - } - ], + "outputs": [], "source": [ "for prompt in prompts:\n", " str_tokens = model.to_str_tokens(prompt)\n", @@ -482,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -501,18 +394,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Per prompt logit difference: tensor([3.3370, 3.2020, 2.7090, 3.7970, 1.7200, 5.2810, 2.6010, 5.7670])\n", - "Average logit difference: 3.552\n" - ] - } - ], + "outputs": [], "source": [ "def logits_to_ave_logit_diff(logits, answer_tokens, per_prompt=False):\n", " # Only the final logits are relevant for the answer\n", @@ -638,25 +522,22 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Answer residual directions shape: torch.Size([8, 2, 768])\n", - "Logit difference directions shape: torch.Size([8, 768])\n" - ] - } - ], + "outputs": [], "source": [ - "answer_residual_directions = model.tokens_to_residual_directions(answer_tokens)\n", + "# TransformerBridge doesn't have tokens_to_residual_directions yet,\n", + "# so we implement it inline using model.unembed.W_U\n", + "W_U = model.unembed.W_U # [d_model, d_vocab]\n", + "answer_residual_directions = W_U[:, answer_tokens]\n", + "answer_residual_directions = einops.rearrange(\n", + " answer_residual_directions, \"d_model batch correct_incorrect -> batch correct_incorrect d_model\"\n", + ")\n", "print(\"Answer residual directions shape:\", answer_residual_directions.shape)\n", "logit_diff_directions = (\n", " answer_residual_directions[:, 0] - answer_residual_directions[:, 1]\n", ")\n", - "print(\"Logit difference directions shape:\", logit_diff_directions.shape)" + "print(\"Logit difference directions shape:\", logit_diff_directions.shape)\n" ] }, { @@ -682,19 +563,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Final residual stream shape: torch.Size([8, 15, 768])\n", - "Calculated average logit diff: 3.552\n", - "Original logit difference: 3.552\n" - ] - } - ], + "outputs": [], "source": [ "# cache syntax - resid_post is the residual stream at the end of the layer, -1 gets the final layer. The general syntax is [activation_name, layer_index, sub_layer_type].\n", "final_residual_stream = cache[\"resid_post\", -1]\n", @@ -731,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -769,966 +640,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "%{hovertext}

x=%{x}
y=%{y}", - "hovertext": [ - "0_pre", - "0_mid", - "1_pre", - "1_mid", - "2_pre", - "2_mid", - "3_pre", - "3_mid", - "4_pre", - "4_mid", - "5_pre", - "5_mid", - "6_pre", - "6_mid", - "7_pre", - "7_mid", - "8_pre", - "8_mid", - "9_pre", - "9_mid", - "10_pre", - "10_mid", - "11_pre", - "11_mid", - "final_post" - ], - "legendgroup": "", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - 0, - 0.5, - 1, - 1.5, - 2, - 2.5, - 3, - 3.5, - 4, - 4.5, - 5, - 5.5, - 6, - 6.5, - 7, - 7.5, - 8, - 8.5, - 9, - 9.5, - 10, - 10.5, - 11, - 11.5, - 12 - ], - "xaxis": "x", - "y": [ - 1.2937933206558228e-05, - -0.006643360480666161, - -0.007525032386183739, - -0.009075596928596497, - -0.008736769668757915, - -0.008685456588864326, - -0.006480347365140915, - -0.007939882576465607, - -0.009661720134317875, - -0.015095856040716171, - -0.01419061329215765, - -0.019930001348257065, - -0.00912435818463564, - -0.027298055589199066, - -0.02985510788857937, - 0.2497255504131317, - 0.250558078289032, - 0.45005205273628235, - 0.45996904373168945, - 5.02545166015625, - 5.142900466918945, - 4.730565071105957, - 4.887058258056641, - 3.445383071899414, - 3.5518720149993896 - ], - "yaxis": "y" - } - ], - "layout": { - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Accumulate Residual Stream" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "x" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "y" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "accumulated_residual, labels = cache.accumulated_resid(\n", " layer=-1, incl_mid=True, pos_slice=-1, return_labels=True\n", @@ -1762,969 +676,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "%{hovertext}

x=%{x}
y=%{y}", - "hovertext": [ - "embed", - "pos_embed", - "0_attn_out", - "0_mlp_out", - "1_attn_out", - "1_mlp_out", - "2_attn_out", - "2_mlp_out", - "3_attn_out", - "3_mlp_out", - "4_attn_out", - "4_mlp_out", - "5_attn_out", - "5_mlp_out", - "6_attn_out", - "6_mlp_out", - "7_attn_out", - "7_mlp_out", - "8_attn_out", - "8_mlp_out", - "9_attn_out", - "9_mlp_out", - "10_attn_out", - "10_mlp_out", - "11_attn_out", - "11_mlp_out" - ], - "legendgroup": "", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25 - ], - "xaxis": "x", - "y": [ - -0.00028366726473905146, - 0.00029660604195669293, - -0.0066563040018081665, - -0.0008816685294732451, - -0.0015505650080740452, - 0.00033882574643939734, - 5.131529178470373e-05, - 0.0022051138803362846, - -0.0014595506945624948, - -0.0017218313878402114, - -0.005434143822640181, - 0.0009052485693246126, - -0.0057394010946154594, - 0.010805649682879448, - -0.018173698335886, - -0.002557049971073866, - 0.27958065271377563, - 0.0008325176313519478, - 0.19949400424957275, - 0.00991708692163229, - 4.565483093261719, - 0.11744903028011322, - -0.4123360514640808, - 0.15649384260177612, - -1.4416757822036743, - 0.10648896545171738 - ], - "yaxis": "y" - } - ], - "layout": { - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Each Layer" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "x" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "y" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "per_layer_residual, labels = cache.decompose_resid(\n", " layer=-1, pos_slice=-1, return_labels=True\n", @@ -2757,1102 +711,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Tried to stack head results when they weren't cached. Computing head results now\n" - ] - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - -0.0020563392899930477, - -0.0005101899732835591, - 0.0004685786843765527, - 0.00012512074317783117, - -0.0006028738571330905, - -0.0002429460291750729, - -0.0023189077619463205, - -0.002758360467851162, - 0.000564602785743773, - 0.0009697531932033598, - -0.0002504526637494564, - 4.737317794933915e-06 - ], - [ - -0.0010070882271975279, - 0.00039470894262194633, - -0.00154874159488827, - 0.0014034928753972054, - -0.0012653048615902662, - -0.0011358022456988692, - -0.00281596090644598, - -0.0029645217582583427, - 0.0029190476052463055, - 0.0025743592996150255, - 0.00036239007022231817, - 0.0017548729665577412 - ], - [ - 0.0005569400964304805, - -0.001126631861552596, - -0.0017353934235870838, - -0.0014514457434415817, - -0.00028735760133713484, - 0.0017211002996191382, - 0.0026658899150788784, - 0.00311466702260077, - 0.0005667927907779813, - -0.003666515462100506, - -0.0018847601022571325, - 7.039372576400638e-06 - ], - [ - -0.0007264417363330722, - 0.00011364505917299539, - 0.0014301587361842394, - 0.0007490540738217533, - 0.0020184689201414585, - 0.0007436950691044331, - -0.00046178390039131045, - -0.0039057559333741665, - 0.0011406694538891315, - -4.022853681817651e-05, - -0.0013293239753693342, - -0.0017636751290410757 - ], - [ - -0.0028280913829803467, - 0.00033634810824878514, - -0.0014248639345169067, - -0.003777273464947939, - 0.0015998880844563246, - 0.0002989505883306265, - -0.000804675742983818, - 0.002038792008534074, - -0.0015593919670209289, - -0.0006436670082621276, - 0.0011168173514306545, - -0.00035012533771805465 - ], - [ - 0.0011338205076754093, - 0.0011259170714765787, - -0.002516670385375619, - -0.0014790185960009694, - 0.0003878737334161997, - -6.408110493794084e-05, - -0.0005096744280308485, - -0.0008840755908749998, - 0.0006398351397365332, - -0.0010097370250150561, - -0.006759158335626125, - 0.0033667823299765587 - ], - [ - -0.01514742337167263, - -0.0021350777242332697, - 0.002593174111098051, - -0.00042678468162193894, - -0.005558924749493599, - 0.0026658528950065374, - 0.006411008536815643, - -0.003826778382062912, - -0.0003843410813715309, - -0.0016430341638624668, - -0.0013344454346224666, - -9.20506427064538e-05 - ], - [ - -9.476230479776859e-05, - -0.0057889921590685844, - -0.0006383581785485148, - 0.13493388891220093, - -0.001768707763403654, - -0.018917907029390335, - 0.003873429261147976, - -0.0021450775675475597, - -0.010327338241040707, - 0.18325845897197723, - -0.0007747983909212053, - -0.00104526337236166 - ], - [ - -0.003833949100226164, - -0.0008046097937040031, - -0.012673400342464447, - 0.00804573018103838, - 0.003604492638260126, - -0.009398287162184715, - -0.08272082358598709, - 0.003555194940418005, - -0.018404025584459305, - 0.0017587244510650635, - 0.2896133363246918, - 0.022854052484035492 - ], - [ - 0.08595258742570877, - -0.0006932877004146576, - 0.06817055493593216, - 0.013111240230500698, - -0.021098043769598007, - 0.05112447217106819, - 1.3844914436340332, - 0.045836858451366425, - -0.03830280900001526, - 2.985445976257324, - 0.0019662054255604744, - -0.008030137047171593 - ], - [ - 0.5608693957328796, - 0.17083050310611725, - -0.03361757844686508, - 0.05821544677019119, - -0.0024530249647796154, - 0.0018771197646856308, - 0.28827205300331116, - -1.8986485004425049, - -0.0015286931302398443, - -0.035129792988300323, - 0.4802178740501404, - -0.0009115453576669097 - ], - [ - 0.016075748950242996, - -0.03986122086644173, - -0.3879126012325287, - 0.011123123578727245, - -0.005477819126099348, - -0.0025129620917141438, - -0.08056175708770752, - 0.007518616039305925, - 0.0430111438035965, - -0.040082238614559174, - -0.9702364802360535, - 0.011862239800393581 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Each Head" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "per_head_residual, labels = cache.stack_head_results(\n", " layer=-1, pos_slice=-1, return_labels=True\n", @@ -3893,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3908,6 +769,11 @@ " if isinstance(heads, int):\n", " heads = [heads]\n", "\n", + " # Handle empty head list\n", + " if len(heads) == 0:\n", + " title_html = f\"

{title}


\"\n", + " return f\"
{title_html}

No heads in this group

\"\n", + "\n", " # Create the plotting data\n", " labels: List[str] = []\n", " patterns: List[Float[torch.Tensor, \"dest_pos src_pos\"]] = []\n", @@ -3954,39 +820,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

Top 3 Positive Logit Attribution Heads


\n", - "

Top 3 Negative Logit Attribution Heads


\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "top_k = 3\n", "\n", @@ -4098,18 +934,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Corrupted Average Logit Diff -3.55\n", - "Clean Average Logit Diff 3.55\n" - ] - } - ], + "outputs": [], "source": [ "corrupted_prompts = []\n", "for i in range(0, len(prompts), 2):\n", @@ -4126,27 +953,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['<|endoftext|>When John and Mary went to the shops, Mary gave the bag to',\n", - " '<|endoftext|>When John and Mary went to the shops, John gave the bag to',\n", - " '<|endoftext|>When Tom and James went to the park, Tom gave the ball to',\n", - " '<|endoftext|>When Tom and James went to the park, James gave the ball to',\n", - " '<|endoftext|>When Dan and Sid went to the shops, Dan gave an apple to',\n", - " '<|endoftext|>When Dan and Sid went to the shops, Sid gave an apple to',\n", - " '<|endoftext|>After Martin and Amy went to the park, Martin gave a drink to',\n", - " '<|endoftext|>After Martin and Amy went to the park, Amy gave a drink to']" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model.to_string(corrupted_tokens)" ] @@ -4163,7 +972,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4173,7 +982,9 @@ " pos,\n", " clean_cache,\n", "):\n", - " corrupted_residual_component[:, pos, :] = clean_cache[hook.name][:, pos, :]\n", + " clean_value = clean_cache[hook.name][:, pos, :].clone()\n", + " corrupted_residual_component = corrupted_residual_component.clone()\n", + " corrupted_residual_component[:, pos : pos + 1, :] = clean_value.unsqueeze(1)\n", " return corrupted_residual_component\n", "\n", "\n", @@ -4217,1148 +1028,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Position: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "x": [ - "<|endoftext|>_0", - "When_1", - " John_2", - " and_3", - " Mary_4", - " went_5", - " to_6", - " the_7", - " shops_8", - ",_9", - " John_10", - " gave_11", - " the_12", - " bag_13", - " to_14" - ], - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1.000650405883789, - -0.0002469856117386371, - 9.76665523921838e-06, - -0.00036458822432905436, - -4.8967522161547095e-05 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1.001051902770996, - -2.7621845219982788e-05, - -1.9768245692830533e-05, - -0.0004596704675350338, - -0.0005947590689174831 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1.0002663135528564, - 0.0008680911851115525, - 0.0005157867562957108, - -0.0009929431835189462, - -0.0008658089209347963 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.994907796382904, - 0.005429857410490513, - 0.0016050540143623948, - -0.0006193603039719164, - -0.0016324409516528249 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.9675672054290771, - 0.03134213387966156, - 0.0028418952133506536, - -0.0012302964460104704, - -0.000985861523076892 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.967520534992218, - 0.03100077249109745, - 0.0017823305679485202, - -0.00048668819363228977, - -0.0006467136554419994 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.9228319525718689, - 0.05134531855583191, - 0.004728672094643116, - 0.0009345446596853435, - 0.017046840861439705 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.6565483808517456, - 0.02385685034096241, - 0.002357019344344735, - -1.7183941963594407e-05, - 0.3186916410923004 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.027302566915750504, - 0.03142499923706055, - 0.0018202561186626554, - 0.0007990868762135506, - 0.9383866190910339 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.026841485872864723, - 0.02098155952990055, - 0.0012512058019638062, - 0.00032317222212441266, - 1.0048279762268066 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.005687985569238663, - 0.014263377524912357, - 0.00048709093243815005, - -8.977938705356792e-05, - 0.9914212226867676 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched Residual Stream" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Position" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "prompt_position_labels = [\n", " f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(tokens[0]))\n", @@ -5387,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5434,1148 +1106,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Position: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "x": [ - "<|endoftext|>_0", - "When_1", - " John_2", - " and_3", - " Mary_4", - " went_5", - " to_6", - " the_7", - " shops_8", - ",_9", - " John_10", - " gave_11", - " the_12", - " bag_13", - " to_14" - ], - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.035456884652376175, - -0.0002469856117386371, - 9.76665523921838e-06, - -0.00036458822432905436, - -4.8967522161547095e-05 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.0029848709236830473, - 7.950929284561425e-05, - 2.0842242520302534e-05, - 8.088535105343908e-05, - -0.0005967392353340983 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.0019131568260490894, - 0.0006668510613963008, - 0.00039482791908085346, - -0.0007051457650959492, - -0.00027282864903099835 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.1546323299407959, - 0.0038019807543605566, - 0.0005171628436073661, - -0.00011964991426793858, - -0.0005599213181994855 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.005406397394835949, - 0.019581740722060204, - 0.001007509301416576, - -0.0002424211270408705, - 0.0007936497568152845 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.3520970046520233, - 0.0010525835677981377, - 0.00022436455765273422, - 0.00013367898645810783, - 8.172441448550671e-05 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.11986024677753448, - 0.021243548020720482, - 0.002727783052250743, - 0.0013409851817414165, - 0.01797366514801979 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.013310473412275314, - 0.011509180068969727, - 0.00037542887730523944, - -4.094611358596012e-05, - 0.29760244488716125 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.0015009435592219234, - 0.017351653426885605, - 0.0005848917062394321, - 0.0010122752282768488, - 0.5697318911552429 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.00012901381705887616, - 0.00630143890157342, - 0.00014156615361571312, - 0.00031229801243171096, - 0.27152299880981445 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.0009373303619213402, - 8.669164526509121e-05, - 0.00033243544748984277, - 9.73309283835988e-07, - -0.1929796040058136 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.40617984533309937 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched Attention Layer" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Position" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " patched_attn_diff,\n", @@ -6606,1148 +1139,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Position: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "x": [ - "<|endoftext|>_0", - "When_1", - " John_2", - " and_3", - " Mary_4", - " went_5", - " to_6", - " the_7", - " shops_8", - ",_9", - " John_10", - " gave_11", - " the_12", - " bag_13", - " to_14" - ], - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.8507890701293945, - -0.00027843358111567795, - -7.293107046280056e-05, - -0.00047373308916576207, - 4.0039929444901645e-05 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.008863994851708412, - 0.000222149450564757, - 0.00014938619278836995, - -4.853121208725497e-05, - 0.000304041663184762 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.013550343923270702, - 5.86334899708163e-05, - -0.0003296833310741931, - -0.0006382559076882899, - 0.0007730424986220896 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.0019468198297545314, - 0.0004995090421289206, - 0.00017318192112725228, - 0.00016871812113095075, - 0.00040764876757748425 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.019787074998021126, - 0.004128609783947468, - -4.86990247736685e-05, - -0.00017019486404024065, - 0.0007914346642792225 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.09652391821146011, - -0.0018826150335371494, - -0.0004844730719923973, - 0.0007094081956893206, - -0.00018335132335778326 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.015900013968348503, - -0.0008501688134856522, - 0.00012337534280959517, - 2.7521158699528314e-05, - -0.007238299585878849 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.010360540822148323, - 0.0031509376130998135, - 0.0005309234256856143, - 0.0002361114020459354, - 0.008496351540088654 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.012533102184534073, - 2.201692586822901e-05, - -0.00035374757135286927, - 8.615465048933402e-05, - -0.021631328389048576 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -0.00033465056912973523, - 0.0008094912045635283, - 1.6244195649051107e-05, - 0.00012924875773023814, - 0.03162466362118721 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.0013599144294857979, - -0.00019499746849760413, - -9.934466652339324e-05, - -0.00014217027637641877, - 0.028764141723513603 - ], - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0.02044912613928318 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched MLP Layer" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Position" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " patched_mlp_diff,\n", @@ -7770,7 +1164,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -7780,9 +1174,11 @@ " head_index,\n", " clean_cache,\n", "):\n", - " corrupted_head_vector[:, :, head_index, :] = clean_cache[hook.name][\n", + " clean_value = clean_cache[hook.name][\n", " :, :, head_index, :\n", - " ]\n", + " ].clone()\n", + " corrupted_head_vector = corrupted_head_vector.clone()\n", + " corrupted_head_vector[:, :, head_index : head_index + 1, :] = clean_value.unsqueeze(2)\n", " return corrupted_head_vector\n", "\n", "\n", @@ -7813,1095 +1209,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0.0009487751522101462, - 0.016124747693538666, - 0.0018548924708738923, - 0.0034389030188322067, - -0.00982347596436739, - 0.011058605276048183, - -0.004063969012349844, - -0.0015792781487107277, - -0.0012082795146852732, - 0.003828897839412093, - -0.004256919026374817, - -0.0011422622483223677 - ], - [ - -0.0010771177476271987, - -0.00037898647133260965, - 2.5171791548928013e-06, - -0.00026067905128002167, - -0.00014146546891424805, - 0.0038321535103023052, - -0.0004293300735298544, - -0.00142992555629462, - -0.0009228314156644046, - 0.0006944393389858305, - 0.00043302192352712154, - -0.0035714071709662676 - ], - [ - -0.0004967569257132709, - 0.0008057993836700916, - 0.0005424688570201397, - -0.0005309234256856143, - -0.0007159864180721343, - -0.0010389237431809306, - -0.0009490771917626262, - -8.649027586216107e-05, - 0.0002766547549981624, - 0.0021084228064864874, - -0.0001975146442418918, - -0.0016405630158260465 - ], - [ - 0.1162627637386322, - 0.0002507446042727679, - -0.0014675153652206063, - -0.00039680811460129917, - 0.018962211906909943, - -0.00018764731066767126, - 0.011170871555805206, - -0.0013301445869728923, - -0.0007356539717875421, - -0.00030253134900704026, - -0.00014683544577565044, - -0.00022228369198273867 - ], - [ - -0.001650598249398172, - 0.0002927311579696834, - -0.00143563118763268, - 0.03084198758006096, - -0.007432155776768923, - -0.00028236035723239183, - 0.006017433945089579, - -0.011007187888026237, - -0.001266107545234263, - 0.0014901700196787715, - -0.0001800622121663764, - 0.002944394713267684 - ], - [ - -0.004211106337606907, - 0.0029597999528050423, - 0.002045023487880826, - 0.0013397098518908024, - -0.0012190865818411112, - 0.34349915385246277, - 0.0005632104002870619, - -0.0001262281439267099, - -0.00515326950699091, - 0.016240738332271576, - 0.01709030382335186, - -0.004175194539129734 - ], - [ - 0.039775289595127106, - 0.015226684510707855, - -0.0010229480685666203, - 0.0008072761120274663, - -0.004935584031045437, - -0.002123525831848383, - -0.014274083077907562, - 0.0013746818294748664, - 0.0014838266652077436, - 0.1302703619003296, - -0.00033616088330745697, - 0.0012919505825266242 - ], - [ - 0.00037177055492065847, - 0.019514480605721474, - 0.00022255218937061727, - 0.124249167740345, - -0.00040352059295400977, - -0.007652895525097847, - 0.0013010123511776328, - -0.0011253133416175842, - -0.007449474185705185, - 0.19224143028259277, - -0.003275118535384536, - -0.0005017912480980158 - ], - [ - -0.001007912098430097, - 3.091096004936844e-05, - -0.0008595998515374959, - 0.012359987013041973, - -0.0004041247011628002, - -0.004328910261392593, - 0.3185553252696991, - 0.002330605871975422, - 0.0021182901691645384, - 0.0001405928487656638, - 0.2779357433319092, - 0.005738262087106705 - ], - [ - 0.0058898297138512135, - -0.0009689796715974808, - 0.00912561360746622, - 0.020675739273428917, - -0.03700518235564232, - 0.014263041317462921, - -0.04828466475009918, - 0.05834139883518219, - 0.0006514795240946114, - 0.26360899209976196, - 0.0004918567719869316, - -0.00261044898070395 - ], - [ - 0.08374208211898804, - 0.020676210522651672, - -0.003743582172319293, - 0.01085072010755539, - -0.001096583902835846, - 0.00047430366976186633, - 0.04818058758974075, - -0.4799128472805023, - 0.00018429107149131596, - 0.011861988343298435, - 0.06088569387793541, - 0.0008461413672193885 - ], - [ - 0.005328264087438583, - -0.011493473313748837, - -0.11350836604833603, - 0.006329597905278206, - 0.00031669469899497926, - -0.0011600167490541935, - -0.022669579833745956, - 0.004070379305630922, - 0.0073160636238753796, - -0.00834545586258173, - -0.27817651629447937, - 0.0036344374530017376 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched Head Output" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " patched_head_z_diff,\n", @@ -8933,7 +1243,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -8964,1095 +1274,9 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - -0.00019892427371814847, - 0.005339574534446001, - 0.0006527548539452255, - 0.003504416672512889, - -0.00898387935012579, - 0.0034814265090972185, - -0.0008631910313852131, - -3.406582254683599e-05, - 0.0005166929331608117, - 0.00044255363172851503, - -0.0039068968035280704, - -0.0001880836207419634 - ], - [ - -0.0004399022145662457, - -0.00044510437874123454, - -6.73597096465528e-05, - 7.242763240355998e-05, - -3.6549441574607044e-05, - -0.0019323208834975958, - -0.0001572397886775434, - 1.6143509128596634e-05, - 0.00020593880617525429, - 0.000336798548232764, - 0.0003515324497129768, - -0.0005669358652085066 - ], - [ - 0.00021013410878367722, - -0.0007199132232926786, - 0.0004868560063187033, - -0.0005974104860797524, - -0.0005921411793678999, - -0.0005443819100037217, - -0.000227552984142676, - -0.0004809825913980603, - 0.00020570388005580753, - 0.001183376181870699, - -0.0003574058646336198, - -0.0009104468626901507 - ], - [ - 0.0010395278222858906, - -0.00012042184971505776, - -7.762980385450646e-05, - -0.0007275318494066596, - -0.001310007064603269, - -0.0023108376190066338, - 0.010987084358930588, - -5.0712766096694395e-05, - 0.00014314358122646809, - 0.00015069512301124632, - -7.957642083056271e-05, - -2.0238119759596884e-05 - ], - [ - -0.0005373673629947007, - -0.0008137872209772468, - -0.00013334336108528078, - 0.030609702691435814, - -0.007185807917267084, - 0.000148916311445646, - 0.0013340713921934366, - -0.01142292469739914, - -0.0005336419562809169, - 0.0005126654868945479, - 0.00037344868178479373, - 0.0029547319281846285 - ], - [ - 8.22278525447473e-06, - 6.477540864580078e-06, - 0.0015973682748153806, - 0.00034015480196103454, - -0.0012577504385262728, - -5.450531898532063e-05, - 0.0006331544718705118, - -0.00027081489679403603, - 7.427356467815116e-05, - -0.006704355590045452, - 0.003175975289195776, - -0.0017300404142588377 - ], - [ - 0.04863045737147331, - 0.015314852818846703, - -0.0004648726317100227, - -0.00011676354915834963, - -4.930314753437415e-05, - -0.003952810075134039, - -0.01737578585743904, - -0.00015421917487401515, - 0.0012194222072139382, - -0.00018090127559844404, - -0.00042647725786082447, - 0.00012334177154116333 - ], - [ - -2.956846401502844e-05, - -0.0013855225406587124, - -0.00012129446986364201, - 0.1332160234451294, - -0.00024490474606864154, - -0.007315828464925289, - 0.00033297244226559997, - -0.000795092957559973, - -0.007938209921121597, - 0.208413764834404, - -0.00019127204723190516, - -0.00020650937221944332 - ], - [ - -0.0020483459811657667, - -0.0003764357534237206, - -0.0033135139383375645, - -0.009666135534644127, - -0.00031723169377073646, - -0.005141589790582657, - 0.31717124581336975, - 0.0028427678626030684, - 0.0004723234742414206, - -0.0011529687326401472, - 0.2726709246635437, - -0.003175639547407627 - ], - [ - -0.00043929810635745525, - 5.7089622714556754e-05, - -0.0020629793871194124, - 0.020066648721694946, - -0.007871017791330814, - 0.011316264048218727, - 0.003056862158700824, - 0.06856372952461243, - -0.002747517777606845, - -0.009279227815568447, - 0.000506624230183661, - -0.0013159140944480896 - ], - [ - -0.012957162223756313, - -0.0030454176012426615, - -0.01792328804731369, - -0.0043589151464402676, - -0.0011521632550284266, - 0.0004999117809347808, - -0.0031131464056670666, - 0.019585633650422096, - 4.34632929682266e-05, - 0.01297028549015522, - -0.007695754989981651, - -0.0009146086522378027 - ], - [ - 0.004100752994418144, - -0.020459463819861412, - -0.035875942558050156, - 0.014656225219368935, - 0.0008441276149824262, - 0.0017804511589929461, - -0.01804223284125328, - 0.003519016318023205, - 0.008253024891018867, - -0.0017665562918409705, - 0.044167667627334595, - 0.006474285386502743 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched Head Value" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " patched_head_v_diff,\n", @@ -10072,1523 +1296,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "%{hovertext}

Value Patch=%{x}
Output Patch=%{y}
Layer=%{marker.color}", - "hovertext": [ - "L0H0", - "L0H1", - "L0H2", - "L0H3", - "L0H4", - "L0H5", - "L0H6", - "L0H7", - "L0H8", - "L0H9", - "L0H10", - "L0H11", - "L1H0", - "L1H1", - "L1H2", - "L1H3", - "L1H4", - "L1H5", - "L1H6", - "L1H7", - "L1H8", - "L1H9", - "L1H10", - "L1H11", - "L2H0", - "L2H1", - "L2H2", - "L2H3", - "L2H4", - "L2H5", - "L2H6", - "L2H7", - "L2H8", - "L2H9", - "L2H10", - "L2H11", - "L3H0", - "L3H1", - "L3H2", - "L3H3", - "L3H4", - "L3H5", - "L3H6", - "L3H7", - "L3H8", - "L3H9", - "L3H10", - "L3H11", - "L4H0", - "L4H1", - "L4H2", - "L4H3", - "L4H4", - "L4H5", - "L4H6", - "L4H7", - "L4H8", - "L4H9", - "L4H10", - "L4H11", - "L5H0", - "L5H1", - "L5H2", - "L5H3", - "L5H4", - "L5H5", - "L5H6", - "L5H7", - "L5H8", - "L5H9", - "L5H10", - "L5H11", - "L6H0", - "L6H1", - "L6H2", - "L6H3", - "L6H4", - "L6H5", - "L6H6", - "L6H7", - "L6H8", - "L6H9", - "L6H10", - "L6H11", - "L7H0", - "L7H1", - "L7H2", - "L7H3", - "L7H4", - "L7H5", - "L7H6", - "L7H7", - "L7H8", - "L7H9", - "L7H10", - "L7H11", - "L8H0", - "L8H1", - "L8H2", - "L8H3", - "L8H4", - "L8H5", - "L8H6", - "L8H7", - "L8H8", - "L8H9", - "L8H10", - "L8H11", - "L9H0", - "L9H1", - "L9H2", - "L9H3", - "L9H4", - "L9H5", - "L9H6", - "L9H7", - "L9H8", - "L9H9", - "L9H10", - "L9H11", - "L10H0", - "L10H1", - "L10H2", - "L10H3", - "L10H4", - "L10H5", - "L10H6", - "L10H7", - "L10H8", - "L10H9", - "L10H10", - "L10H11", - "L11H0", - "L11H1", - "L11H2", - "L11H3", - "L11H4", - "L11H5", - "L11H6", - "L11H7", - "L11H8", - "L11H9", - "L11H10", - "L11H11" - ], - "legendgroup": "", - "marker": { - "color": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 4, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11 - ], - "coloraxis": "coloraxis", - "symbol": "circle" - }, - "mode": "markers", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - -0.00019892427371814847, - 0.005339574534446001, - 0.0006527548539452255, - 0.003504416672512889, - -0.00898387935012579, - 0.0034814265090972185, - -0.0008631910313852131, - -3.406582254683599e-05, - 0.0005166929331608117, - 0.00044255363172851503, - -0.0039068968035280704, - -0.0001880836207419634, - -0.0004399022145662457, - -0.00044510437874123454, - -6.73597096465528e-05, - 7.242763240355998e-05, - -3.6549441574607044e-05, - -0.0019323208834975958, - -0.0001572397886775434, - 1.6143509128596634e-05, - 0.00020593880617525429, - 0.000336798548232764, - 0.0003515324497129768, - -0.0005669358652085066, - 0.00021013410878367722, - -0.0007199132232926786, - 0.0004868560063187033, - -0.0005974104860797524, - -0.0005921411793678999, - -0.0005443819100037217, - -0.000227552984142676, - -0.0004809825913980603, - 0.00020570388005580753, - 0.001183376181870699, - -0.0003574058646336198, - -0.0009104468626901507, - 0.0010395278222858906, - -0.00012042184971505776, - -7.762980385450646e-05, - -0.0007275318494066596, - -0.001310007064603269, - -0.0023108376190066338, - 0.010987084358930588, - -5.0712766096694395e-05, - 0.00014314358122646809, - 0.00015069512301124632, - -7.957642083056271e-05, - -2.0238119759596884e-05, - -0.0005373673629947007, - -0.0008137872209772468, - -0.00013334336108528078, - 0.030609702691435814, - -0.007185807917267084, - 0.000148916311445646, - 0.0013340713921934366, - -0.01142292469739914, - -0.0005336419562809169, - 0.0005126654868945479, - 0.00037344868178479373, - 0.0029547319281846285, - 8.22278525447473e-06, - 6.477540864580078e-06, - 0.0015973682748153806, - 0.00034015480196103454, - -0.0012577504385262728, - -5.450531898532063e-05, - 0.0006331544718705118, - -0.00027081489679403603, - 7.427356467815116e-05, - -0.006704355590045452, - 0.003175975289195776, - -0.0017300404142588377, - 0.04863045737147331, - 0.015314852818846703, - -0.0004648726317100227, - -0.00011676354915834963, - -4.930314753437415e-05, - -0.003952810075134039, - -0.01737578585743904, - -0.00015421917487401515, - 0.0012194222072139382, - -0.00018090127559844404, - -0.00042647725786082447, - 0.00012334177154116333, - -2.956846401502844e-05, - -0.0013855225406587124, - -0.00012129446986364201, - 0.1332160234451294, - -0.00024490474606864154, - -0.007315828464925289, - 0.00033297244226559997, - -0.000795092957559973, - -0.007938209921121597, - 0.208413764834404, - -0.00019127204723190516, - -0.00020650937221944332, - -0.0020483459811657667, - -0.0003764357534237206, - -0.0033135139383375645, - -0.009666135534644127, - -0.00031723169377073646, - -0.005141589790582657, - 0.31717124581336975, - 0.0028427678626030684, - 0.0004723234742414206, - -0.0011529687326401472, - 0.2726709246635437, - -0.003175639547407627, - -0.00043929810635745525, - 5.7089622714556754e-05, - -0.0020629793871194124, - 0.020066648721694946, - -0.007871017791330814, - 0.011316264048218727, - 0.003056862158700824, - 0.06856372952461243, - -0.002747517777606845, - -0.009279227815568447, - 0.000506624230183661, - -0.0013159140944480896, - -0.012957162223756313, - -0.0030454176012426615, - -0.01792328804731369, - -0.0043589151464402676, - -0.0011521632550284266, - 0.0004999117809347808, - -0.0031131464056670666, - 0.019585633650422096, - 4.34632929682266e-05, - 0.01297028549015522, - -0.007695754989981651, - -0.0009146086522378027, - 0.004100752994418144, - -0.020459463819861412, - -0.035875942558050156, - 0.014656225219368935, - 0.0008441276149824262, - 0.0017804511589929461, - -0.01804223284125328, - 0.003519016318023205, - 0.008253024891018867, - -0.0017665562918409705, - 0.044167667627334595, - 0.006474285386502743 - ], - "xaxis": "x", - "y": [ - 0.0009487751522101462, - 0.016124747693538666, - 0.0018548924708738923, - 0.0034389030188322067, - -0.00982347596436739, - 0.011058605276048183, - -0.004063969012349844, - -0.0015792781487107277, - -0.0012082795146852732, - 0.003828897839412093, - -0.004256919026374817, - -0.0011422622483223677, - -0.0010771177476271987, - -0.00037898647133260965, - 2.5171791548928013e-06, - -0.00026067905128002167, - -0.00014146546891424805, - 0.0038321535103023052, - -0.0004293300735298544, - -0.00142992555629462, - -0.0009228314156644046, - 0.0006944393389858305, - 0.00043302192352712154, - -0.0035714071709662676, - -0.0004967569257132709, - 0.0008057993836700916, - 0.0005424688570201397, - -0.0005309234256856143, - -0.0007159864180721343, - -0.0010389237431809306, - -0.0009490771917626262, - -8.649027586216107e-05, - 0.0002766547549981624, - 0.0021084228064864874, - -0.0001975146442418918, - -0.0016405630158260465, - 0.1162627637386322, - 0.0002507446042727679, - -0.0014675153652206063, - -0.00039680811460129917, - 0.018962211906909943, - -0.00018764731066767126, - 0.011170871555805206, - -0.0013301445869728923, - -0.0007356539717875421, - -0.00030253134900704026, - -0.00014683544577565044, - -0.00022228369198273867, - -0.001650598249398172, - 0.0002927311579696834, - -0.00143563118763268, - 0.03084198758006096, - -0.007432155776768923, - -0.00028236035723239183, - 0.006017433945089579, - -0.011007187888026237, - -0.001266107545234263, - 0.0014901700196787715, - -0.0001800622121663764, - 0.002944394713267684, - -0.004211106337606907, - 0.0029597999528050423, - 0.002045023487880826, - 0.0013397098518908024, - -0.0012190865818411112, - 0.34349915385246277, - 0.0005632104002870619, - -0.0001262281439267099, - -0.00515326950699091, - 0.016240738332271576, - 0.01709030382335186, - -0.004175194539129734, - 0.039775289595127106, - 0.015226684510707855, - -0.0010229480685666203, - 0.0008072761120274663, - -0.004935584031045437, - -0.002123525831848383, - -0.014274083077907562, - 0.0013746818294748664, - 0.0014838266652077436, - 0.1302703619003296, - -0.00033616088330745697, - 0.0012919505825266242, - 0.00037177055492065847, - 0.019514480605721474, - 0.00022255218937061727, - 0.124249167740345, - -0.00040352059295400977, - -0.007652895525097847, - 0.0013010123511776328, - -0.0011253133416175842, - -0.007449474185705185, - 0.19224143028259277, - -0.003275118535384536, - -0.0005017912480980158, - -0.001007912098430097, - 3.091096004936844e-05, - -0.0008595998515374959, - 0.012359987013041973, - -0.0004041247011628002, - -0.004328910261392593, - 0.3185553252696991, - 0.002330605871975422, - 0.0021182901691645384, - 0.0001405928487656638, - 0.2779357433319092, - 0.005738262087106705, - 0.0058898297138512135, - -0.0009689796715974808, - 0.00912561360746622, - 0.020675739273428917, - -0.03700518235564232, - 0.014263041317462921, - -0.04828466475009918, - 0.05834139883518219, - 0.0006514795240946114, - 0.26360899209976196, - 0.0004918567719869316, - -0.00261044898070395, - 0.08374208211898804, - 0.020676210522651672, - -0.003743582172319293, - 0.01085072010755539, - -0.001096583902835846, - 0.00047430366976186633, - 0.04818058758974075, - -0.4799128472805023, - 0.00018429107149131596, - 0.011861988343298435, - 0.06088569387793541, - 0.0008461413672193885, - 0.005328264087438583, - -0.011493473313748837, - -0.11350836604833603, - 0.006329597905278206, - 0.00031669469899497926, - -0.0011600167490541935, - -0.022669579833745956, - 0.004070379305630922, - 0.0073160636238753796, - -0.00834545586258173, - -0.27817651629447937, - 0.0036344374530017376 - ], - "yaxis": "y" - } - ], - "layout": { - "coloraxis": { - "colorbar": { - "title": { - "text": "Layer" - } - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Scatter plot of output patching vs value patching" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "range": [ - -0.5, - 0.5 - ], - "title": { - "text": "Value Patch" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "range": [ - -0.5, - 0.5 - ], - "title": { - "text": "Output Patch" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "head_labels = [\n", " f\"L{l}H{h}\" for l in range(model.cfg.n_layers) for h in range(model.cfg.n_heads)\n", @@ -11619,7 +1329,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -11629,9 +1339,11 @@ " head_index,\n", " clean_cache,\n", "):\n", - " corrupted_head_pattern[:, head_index, :, :] = clean_cache[hook.name][\n", + " clean_value = clean_cache[hook.name][\n", " :, head_index, :, :\n", - " ]\n", + " ].clone()\n", + " corrupted_head_pattern = corrupted_head_pattern.clone()\n", + " corrupted_head_pattern[:, head_index : head_index + 1, :, :] = clean_value.unsqueeze(1)\n", " return corrupted_head_pattern\n", "\n", "\n", @@ -11655,2405 +1367,9 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0.0006401354330591857, - 0.005318799521774054, - 0.0011584057938307524, - -5.920405237702653e-05, - -0.00106671336106956, - 0.005079298280179501, - -0.0030818663071841, - -0.0020521720871329308, - -0.0014405983965843916, - 0.003492669900879264, - -0.002568227471783757, - -0.0009168237447738647 - ], - [ - -0.0007600873941555619, - 0.0001683824957581237, - 0.00012246915139257908, - -0.00034914951538667083, - 1.4901700524205808e-05, - 0.0050090523436665535, - -0.0002975976967718452, - -0.0014448943547904491, - -0.001099134678952396, - 0.00047447148244827986, - 5.195457561057992e-05, - -0.0034954219590872526 - ], - [ - -0.0007243098807521164, - 0.0017458146903663874, - -0.00015556166181340814, - 5.7626621128292754e-05, - -9.7398049547337e-05, - -0.0004238593974150717, - -0.0007917031762190163, - 0.00027222454082220793, - 0.00010179472155869007, - 0.0004223826399538666, - 0.00015193692524917424, - -0.0007437760941684246 - ], - [ - 0.11458104848861694, - 0.00021140948229003698, - -0.0009424989693798125, - 0.000429833511589095, - 0.02004295401275158, - 0.002104730810970068, - 7.628730963915586e-05, - -0.001543701975606382, - -0.0008484235731884837, - -0.0005819046637043357, - 0.00011921360419364646, - -1.899631206470076e-05 - ], - [ - -0.001127125695347786, - 0.001237143180333078, - -0.0012324444251134992, - -0.0005952289211563766, - -0.0007541133090853691, - -0.0005842540413141251, - 0.004813014063984156, - 0.00018187458044849336, - -0.0005361591465771198, - 0.0008579217828810215, - -0.0002985374303534627, - -1.144477391790133e-05 - ], - [ - -0.004241178277879953, - 0.0029509058222174644, - 0.0005218615406192839, - 0.0009535074350424111, - 0.0001622070267330855, - 0.34350839257240295, - -0.0003052163519896567, - 0.00010293584637111053, - -0.005300541408360004, - 0.024864863604307175, - 0.014383262023329735, - -0.0023285921197384596 - ], - [ - -0.0023893399629741907, - -0.002172795357182622, - -0.00047614958020858467, - 0.00043188079143874347, - -0.004675475414842367, - 0.0018583494238555431, - -0.0026542814448475838, - 0.0014367386465892196, - 0.00030326974228955805, - 0.13043038547039032, - 8.813483145786449e-05, - 0.0011766973184421659 - ], - [ - 0.00031847349600866437, - 0.02057075686752796, - 0.00031840638257563114, - -0.002512782346457243, - -0.0002628941729199141, - -0.00024718698114156723, - 0.0005524033331312239, - -0.00043131023994646966, - 0.00025715501396916807, - 0.008090951479971409, - -0.0030689111445099115, - -0.0004238593974150717 - ], - [ - 0.000976699055172503, - 0.00039251212729141116, - 0.0017534669023007154, - 0.022595642134547234, - -4.4805787183577195e-05, - 0.00014220383309293538, - 0.009584981948137283, - -0.0003157213795930147, - 0.0015271222218871117, - 0.0011813960736617446, - -0.010774029418826103, - 0.00936581939458847 - ], - [ - 0.006314125377684832, - -0.0010949057759717107, - 0.011662023141980171, - 0.0013481340138241649, - -0.02918696030974388, - 0.0038333951961249113, - -0.04409456625580788, - -0.005032042507082224, - 0.00482167350128293, - 0.2766477167606354, - -3.164933150401339e-05, - -0.0006618167390115559 - ], - [ - 0.0953889712691307, - 0.02506939135491848, - 0.014239178970456123, - 0.014754998497664928, - 9.890835644910112e-05, - -8.977938705356792e-05, - 0.05082912743091583, - -0.5051022171974182, - 0.00014696970174554735, - -0.0016026375815272331, - 0.06883199512958527, - 0.002327115274965763 - ], - [ - 0.0013425961369648576, - 0.009630928747355938, - -0.07776415348052979, - -0.007728713098913431, - -0.0005726079107262194, - -0.002957182005047798, - -0.0049475994892418385, - 0.00045916702947579324, - -0.0006328188464976847, - -0.006520198658108711, - -0.3204910457134247, - -0.002473111730068922 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Logit Difference From Patched Head Pattern" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "%{hovertext}

Attention Patch=%{x}
Output Patch=%{y}", - "hovertext": [ - "L0H0", - "L0H1", - "L0H2", - "L0H3", - "L0H4", - "L0H5", - "L0H6", - "L0H7", - "L0H8", - "L0H9", - "L0H10", - "L0H11", - "L1H0", - "L1H1", - "L1H2", - "L1H3", - "L1H4", - "L1H5", - "L1H6", - "L1H7", - "L1H8", - "L1H9", - "L1H10", - "L1H11", - "L2H0", - "L2H1", - "L2H2", - "L2H3", - "L2H4", - "L2H5", - "L2H6", - "L2H7", - "L2H8", - "L2H9", - "L2H10", - "L2H11", - "L3H0", - "L3H1", - "L3H2", - "L3H3", - "L3H4", - "L3H5", - "L3H6", - "L3H7", - "L3H8", - "L3H9", - "L3H10", - "L3H11", - "L4H0", - "L4H1", - "L4H2", - "L4H3", - "L4H4", - "L4H5", - "L4H6", - "L4H7", - "L4H8", - "L4H9", - "L4H10", - "L4H11", - "L5H0", - "L5H1", - "L5H2", - "L5H3", - "L5H4", - "L5H5", - "L5H6", - "L5H7", - "L5H8", - "L5H9", - "L5H10", - "L5H11", - "L6H0", - "L6H1", - "L6H2", - "L6H3", - "L6H4", - "L6H5", - "L6H6", - "L6H7", - "L6H8", - "L6H9", - "L6H10", - "L6H11", - "L7H0", - "L7H1", - "L7H2", - "L7H3", - "L7H4", - "L7H5", - "L7H6", - "L7H7", - "L7H8", - "L7H9", - "L7H10", - "L7H11", - "L8H0", - "L8H1", - "L8H2", - "L8H3", - "L8H4", - "L8H5", - "L8H6", - "L8H7", - "L8H8", - "L8H9", - "L8H10", - "L8H11", - "L9H0", - "L9H1", - "L9H2", - "L9H3", - "L9H4", - "L9H5", - "L9H6", - "L9H7", - "L9H8", - "L9H9", - "L9H10", - "L9H11", - "L10H0", - "L10H1", - "L10H2", - "L10H3", - "L10H4", - "L10H5", - "L10H6", - "L10H7", - "L10H8", - "L10H9", - "L10H10", - "L10H11", - "L11H0", - "L11H1", - "L11H2", - "L11H3", - "L11H4", - "L11H5", - "L11H6", - "L11H7", - "L11H8", - "L11H9", - "L11H10", - "L11H11" - ], - "legendgroup": "", - "marker": { - "color": "#636efa", - "symbol": "circle" - }, - "mode": "markers", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - 0.0006401354330591857, - 0.005318799521774054, - 0.0011584057938307524, - -5.920405237702653e-05, - -0.00106671336106956, - 0.005079298280179501, - -0.0030818663071841, - -0.0020521720871329308, - -0.0014405983965843916, - 0.003492669900879264, - -0.002568227471783757, - -0.0009168237447738647, - -0.0007600873941555619, - 0.0001683824957581237, - 0.00012246915139257908, - -0.00034914951538667083, - 1.4901700524205808e-05, - 0.0050090523436665535, - -0.0002975976967718452, - -0.0014448943547904491, - -0.001099134678952396, - 0.00047447148244827986, - 5.195457561057992e-05, - -0.0034954219590872526, - -0.0007243098807521164, - 0.0017458146903663874, - -0.00015556166181340814, - 5.7626621128292754e-05, - -9.7398049547337e-05, - -0.0004238593974150717, - -0.0007917031762190163, - 0.00027222454082220793, - 0.00010179472155869007, - 0.0004223826399538666, - 0.00015193692524917424, - -0.0007437760941684246, - 0.11458104848861694, - 0.00021140948229003698, - -0.0009424989693798125, - 0.000429833511589095, - 0.02004295401275158, - 0.002104730810970068, - 7.628730963915586e-05, - -0.001543701975606382, - -0.0008484235731884837, - -0.0005819046637043357, - 0.00011921360419364646, - -1.899631206470076e-05, - -0.001127125695347786, - 0.001237143180333078, - -0.0012324444251134992, - -0.0005952289211563766, - -0.0007541133090853691, - -0.0005842540413141251, - 0.004813014063984156, - 0.00018187458044849336, - -0.0005361591465771198, - 0.0008579217828810215, - -0.0002985374303534627, - -1.144477391790133e-05, - -0.004241178277879953, - 0.0029509058222174644, - 0.0005218615406192839, - 0.0009535074350424111, - 0.0001622070267330855, - 0.34350839257240295, - -0.0003052163519896567, - 0.00010293584637111053, - -0.005300541408360004, - 0.024864863604307175, - 0.014383262023329735, - -0.0023285921197384596, - -0.0023893399629741907, - -0.002172795357182622, - -0.00047614958020858467, - 0.00043188079143874347, - -0.004675475414842367, - 0.0018583494238555431, - -0.0026542814448475838, - 0.0014367386465892196, - 0.00030326974228955805, - 0.13043038547039032, - 8.813483145786449e-05, - 0.0011766973184421659, - 0.00031847349600866437, - 0.02057075686752796, - 0.00031840638257563114, - -0.002512782346457243, - -0.0002628941729199141, - -0.00024718698114156723, - 0.0005524033331312239, - -0.00043131023994646966, - 0.00025715501396916807, - 0.008090951479971409, - -0.0030689111445099115, - -0.0004238593974150717, - 0.000976699055172503, - 0.00039251212729141116, - 0.0017534669023007154, - 0.022595642134547234, - -4.4805787183577195e-05, - 0.00014220383309293538, - 0.009584981948137283, - -0.0003157213795930147, - 0.0015271222218871117, - 0.0011813960736617446, - -0.010774029418826103, - 0.00936581939458847, - 0.006314125377684832, - -0.0010949057759717107, - 0.011662023141980171, - 0.0013481340138241649, - -0.02918696030974388, - 0.0038333951961249113, - -0.04409456625580788, - -0.005032042507082224, - 0.00482167350128293, - 0.2766477167606354, - -3.164933150401339e-05, - -0.0006618167390115559, - 0.0953889712691307, - 0.02506939135491848, - 0.014239178970456123, - 0.014754998497664928, - 9.890835644910112e-05, - -8.977938705356792e-05, - 0.05082912743091583, - -0.5051022171974182, - 0.00014696970174554735, - -0.0016026375815272331, - 0.06883199512958527, - 0.002327115274965763, - 0.0013425961369648576, - 0.009630928747355938, - -0.07776415348052979, - -0.007728713098913431, - -0.0005726079107262194, - -0.002957182005047798, - -0.0049475994892418385, - 0.00045916702947579324, - -0.0006328188464976847, - -0.006520198658108711, - -0.3204910457134247, - -0.002473111730068922 - ], - "xaxis": "x", - "y": [ - 0.0009487751522101462, - 0.016124747693538666, - 0.0018548924708738923, - 0.0034389030188322067, - -0.00982347596436739, - 0.011058605276048183, - -0.004063969012349844, - -0.0015792781487107277, - -0.0012082795146852732, - 0.003828897839412093, - -0.004256919026374817, - -0.0011422622483223677, - -0.0010771177476271987, - -0.00037898647133260965, - 2.5171791548928013e-06, - -0.00026067905128002167, - -0.00014146546891424805, - 0.0038321535103023052, - -0.0004293300735298544, - -0.00142992555629462, - -0.0009228314156644046, - 0.0006944393389858305, - 0.00043302192352712154, - -0.0035714071709662676, - -0.0004967569257132709, - 0.0008057993836700916, - 0.0005424688570201397, - -0.0005309234256856143, - -0.0007159864180721343, - -0.0010389237431809306, - -0.0009490771917626262, - -8.649027586216107e-05, - 0.0002766547549981624, - 0.0021084228064864874, - -0.0001975146442418918, - -0.0016405630158260465, - 0.1162627637386322, - 0.0002507446042727679, - -0.0014675153652206063, - -0.00039680811460129917, - 0.018962211906909943, - -0.00018764731066767126, - 0.011170871555805206, - -0.0013301445869728923, - -0.0007356539717875421, - -0.00030253134900704026, - -0.00014683544577565044, - -0.00022228369198273867, - -0.001650598249398172, - 0.0002927311579696834, - -0.00143563118763268, - 0.03084198758006096, - -0.007432155776768923, - -0.00028236035723239183, - 0.006017433945089579, - -0.011007187888026237, - -0.001266107545234263, - 0.0014901700196787715, - -0.0001800622121663764, - 0.002944394713267684, - -0.004211106337606907, - 0.0029597999528050423, - 0.002045023487880826, - 0.0013397098518908024, - -0.0012190865818411112, - 0.34349915385246277, - 0.0005632104002870619, - -0.0001262281439267099, - -0.00515326950699091, - 0.016240738332271576, - 0.01709030382335186, - -0.004175194539129734, - 0.039775289595127106, - 0.015226684510707855, - -0.0010229480685666203, - 0.0008072761120274663, - -0.004935584031045437, - -0.002123525831848383, - -0.014274083077907562, - 0.0013746818294748664, - 0.0014838266652077436, - 0.1302703619003296, - -0.00033616088330745697, - 0.0012919505825266242, - 0.00037177055492065847, - 0.019514480605721474, - 0.00022255218937061727, - 0.124249167740345, - -0.00040352059295400977, - -0.007652895525097847, - 0.0013010123511776328, - -0.0011253133416175842, - -0.007449474185705185, - 0.19224143028259277, - -0.003275118535384536, - -0.0005017912480980158, - -0.001007912098430097, - 3.091096004936844e-05, - -0.0008595998515374959, - 0.012359987013041973, - -0.0004041247011628002, - -0.004328910261392593, - 0.3185553252696991, - 0.002330605871975422, - 0.0021182901691645384, - 0.0001405928487656638, - 0.2779357433319092, - 0.005738262087106705, - 0.0058898297138512135, - -0.0009689796715974808, - 0.00912561360746622, - 0.020675739273428917, - -0.03700518235564232, - 0.014263041317462921, - -0.04828466475009918, - 0.05834139883518219, - 0.0006514795240946114, - 0.26360899209976196, - 0.0004918567719869316, - -0.00261044898070395, - 0.08374208211898804, - 0.020676210522651672, - -0.003743582172319293, - 0.01085072010755539, - -0.001096583902835846, - 0.00047430366976186633, - 0.04818058758974075, - -0.4799128472805023, - 0.00018429107149131596, - 0.011861988343298435, - 0.06088569387793541, - 0.0008461413672193885, - 0.005328264087438583, - -0.011493473313748837, - -0.11350836604833603, - 0.006329597905278206, - 0.00031669469899497926, - -0.0011600167490541935, - -0.022669579833745956, - 0.004070379305630922, - 0.0073160636238753796, - -0.00834545586258173, - -0.27817651629447937, - 0.0036344374530017376 - ], - "yaxis": "y" - } - ], - "layout": { - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Scatter plot of output patching vs attention patching" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Attention Patch" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Output Patch" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " patched_head_attn_diff,\n", @@ -14101,47 +1417,9 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

Top Early Heads


\n", - "

Top Middle Heads


\n", - "

Top Late Heads


\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "top_k = 10\n", "top_heads_by_output_patch = torch.topk(\n", @@ -14239,7 +1517,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -14254,31 +1532,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

Induction Heads


\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "code = visualize_attention_patterns(\n", " induction_head_labels,\n", @@ -14350,25 +1606,9 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0.0390, 0.0000, 0.0310],\n", - " [0.1890, 0.1720, 0.0680],\n", - " [0.1570, 0.0210, 0.4820]])\n", - "tensor([[0.0030, 0.1320, 0.0050],\n", - " [0.0000, 0.0000, 0.0020],\n", - " [0.0020, 0.0090, 0.0000]])\n", - "tensor([[0.0040, 0.0000, 0.0040],\n", - " [0.0010, 0.0000, 0.0020],\n", - " [0.0020, 0.0090, 0.0020]])\n" - ] - } - ], + "outputs": [], "source": [ "seq_len = 100\n", "batch_size = 2\n", @@ -14444,3265 +1684,9 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0.039069853723049164, - 0.0004489101702347398, - 0.03133601322770119, - 0.007519590202718973, - 0.034592196345329285, - 0.00036230171099305153, - 0.034512776881456375, - 0.19740213453769684, - 0.038447845727205276, - 0.04053792357444763, - 0.027628764510154724, - 0.02496313862502575 - ], - [ - 0.1890650987625122, - 0.17219914495944977, - 0.06807752698659897, - 0.04494515433907509, - 0.07908554375171661, - 0.03096739575266838, - 0.028282109647989273, - 0.03644327446818352, - 0.026936717331409454, - 0.018826229497790337, - 0.045100897550582886, - 0.0065726665779948235 - ], - [ - 0.15745528042316437, - 0.020724520087242126, - 0.4817989468574524, - 0.2991352379322052, - 0.10764895379543304, - 0.33004048466682434, - 0.0997551754117012, - 0.04926132410764694, - 0.25493940711021423, - 0.3606453835964203, - 0.1257179230451584, - 0.07931824028491974 - ], - [ - 0.005844001192599535, - 0.15787364542484283, - 0.4189082086086273, - 0.30129021406173706, - 0.014345049858093262, - 0.032344333827495575, - 0.3312888443470001, - 0.5285974144935608, - 0.34242063760757446, - 0.101837158203125, - 0.10516070574522018, - 0.2233113795518875 - ], - [ - 0.10626544803380966, - 0.11930850893259048, - 0.022880680859088898, - 0.22826944291591644, - 0.020003994926810265, - 0.10010036826133728, - 0.1739213615655899, - 0.17407020926475525, - 0.02587701380252838, - 0.10249985754489899, - 0.009514841251075268, - 0.9921423196792603 - ], - [ - 0.019766658544540405, - 0.00528325280174613, - 0.16648508608341217, - 0.12087740004062653, - 0.16500000655651093, - 0.00803269725292921, - 0.41770195960998535, - 0.025827765464782715, - 0.04802601411938667, - 0.016231779009103775, - 0.03110172413289547, - 0.024261215701699257 - ], - [ - 0.2172909826040268, - 0.039100028574466705, - 0.01804858259856701, - 0.059900715947151184, - 0.032934583723545074, - 0.0873451679944992, - 0.026895340532064438, - 0.0943947583436966, - 0.49925994873046875, - 0.006240115500986576, - 0.027026718482375145, - 0.1278565675020218 - ], - [ - 0.2511657178401947, - 0.01330868061631918, - 0.006663354113698006, - 0.037430502474308014, - 0.02331537753343582, - 0.01740722358226776, - 0.022067422047257423, - 0.022141192108392715, - 0.04502448812127113, - 0.0208425372838974, - 0.008310739882290363, - 0.017167754471302032 - ], - [ - 0.020890623331069946, - 0.016537941992282867, - 0.02158307284116745, - 0.0150058064609766, - 0.02421221323311329, - 0.10198988765478134, - 0.029100384563207626, - 0.22793792188167572, - 0.02781485579907894, - 0.0179410632699728, - 0.024828944355249405, - 0.03806235268712044 - ], - [ - 0.02607586607336998, - 0.015407431870698929, - 0.02044427953660488, - 0.14558182656764984, - 0.01247025839984417, - 0.017151640728116035, - 0.013311829417943954, - 0.024451706558465958, - 0.018111787736415863, - 0.01319331955164671, - 0.0357399508357048, - 0.01879822090268135 - ], - [ - 0.02147812582552433, - 0.018419174477458, - 0.018183622509241104, - 0.02172141708433628, - 0.0315677747130394, - 0.034705750644207, - 0.017550116404891014, - 0.011417553760111332, - 0.01579565554857254, - 0.04592214897274971, - 0.01621554046869278, - 0.03039470687508583 - ], - [ - 0.03320508822798729, - 0.0175714660435915, - 0.015131079591810703, - 0.04148406535387039, - 0.015181189402937889, - 0.01758997142314911, - 0.015148494392633438, - 0.01767607219517231, - 0.06622709333896637, - 0.018451133742928505, - 0.01700744964182377, - 0.029749270528554916 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Previous Token Scores" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0.0031923248898237944, - 0.13236315548419952, - 0.005006915424019098, - 1.0427449524286203e-05, - 0.0013110184809193015, - 0.7034568786621094, - 0.00426204688847065, - 0.00016496369789820164, - 0.002474633976817131, - 0.0008572910446673632, - 0.01889149099588394, - 0.008690938353538513 - ], - [ - 0.0002916341181844473, - 0.00013782267342321575, - 0.0015036173863336444, - 0.005392482969909906, - 0.0018583914497867227, - 0.009062949568033218, - 0.012414448894560337, - 0.0022405502386391163, - 0.005135662388056517, - 0.005220627877861261, - 0.005546474829316139, - 0.02975049614906311 - ], - [ - 0.0024816279765218496, - 0.009442180395126343, - 0.0003456332196947187, - 0.0002591445227153599, - 0.0052116685546934605, - 0.000570951378904283, - 0.0015209749108180404, - 0.006313100922852755, - 0.001560864970088005, - 0.0004215767839923501, - 0.00015359291865024716, - 0.005160381551831961 - ], - [ - 0.6775657534599304, - 0.002840448170900345, - 0.0007841526530683041, - 0.00471264636144042, - 0.006322895642369986, - 0.006206681486219168, - 0.0005474805948324502, - 0.00037829449865967035, - 0.0020155368838459253, - 0.007952751591801643, - 0.003576782764866948, - 0.002608788898214698 - ], - [ - 0.00860405620187521, - 0.0070286463014781475, - 0.007598803844302893, - 0.003442801535129547, - 0.016561277210712433, - 0.0059797209687530994, - 0.004869826138019562, - 0.0007624455611221492, - 0.006062133703380823, - 0.007536627352237701, - 0.012022900395095348, - 1.055422134237094e-12 - ], - [ - 0.00950299296528101, - 0.00856209360063076, - 0.004162600729614496, - 0.003008665982633829, - 0.006847422569990158, - 0.004358117934316397, - 0.007669268175959587, - 0.009584215469658375, - 0.0076188258826732635, - 0.0043280418030917645, - 0.041402824223041534, - 0.00976183544844389 - ], - [ - 0.004456141032278538, - 0.008873268961906433, - 0.007405205629765987, - 0.0062249391339719296, - 0.00731915095821023, - 0.005623893812298775, - 0.017349667847156525, - 0.005529467947781086, - 0.002920132130384445, - 0.008636755868792534, - 0.006222263444215059, - 0.00835894700139761 - ], - [ - 0.003699858672916889, - 0.04107949137687683, - 0.04148268699645996, - 0.009313640184700489, - 0.009097025729715824, - 0.008774377405643463, - 0.007298537530004978, - 0.023312218487262726, - 0.008843323215842247, - 0.00987986009567976, - 0.017598601058125496, - 0.006039854139089584 - ], - [ - 0.008986304514110088, - 0.028667239472270012, - 0.008891218341886997, - 0.010114557109773159, - 0.009737391024827957, - 0.007611637003719807, - 0.009763265959918499, - 0.005155472084879875, - 0.009276345372200012, - 0.011895839124917984, - 0.010411946102976799, - 0.007498950231820345 - ], - [ - 0.024409977719187737, - 0.011438451707363129, - 0.02003096230328083, - 0.0051185814663767815, - 0.015081286430358887, - 0.012334450148046017, - 0.015452565625309944, - 0.008602450601756573, - 0.014702522195875645, - 0.020766200497746468, - 0.009192758239805698, - 0.005703347735106945 - ], - [ - 0.017897022888064384, - 0.013280633836984634, - 0.006755237001925707, - 0.012744844891130924, - 0.008020960725843906, - 0.007722244597971439, - 0.017341373488307, - 0.0074546560645103455, - 0.007832515984773636, - 0.00825214572250843, - 0.013642766512930393, - 0.012807483784854412 - ], - [ - 0.004923742264509201, - 0.007951060310006142, - 0.007947920821607113, - 0.004564082249999046, - 0.010363400913774967, - 0.009582078084349632, - 0.0102877551689744, - 0.00832072552293539, - 0.0025700009427964687, - 0.012810997664928436, - 0.008063871413469315, - 0.006558285094797611 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Duplicate Token Scores" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - 0.004035575315356255, - 3.85937346436549e-05, - 0.003946058917790651, - 1.7428524756724073e-07, - 5.9896130551351234e-05, - 4.0836803236743435e-05, - 0.0035017586778849363, - 0.00024610417312942445, - 0.0031679815147072077, - 0.0030104012694209814, - 0.002093541668727994, - 0.008525434881448746 - ], - [ - 0.000526473973877728, - 0.00015670718858018517, - 0.001507942914031446, - 0.005595325026661158, - 0.0018401180859655142, - 0.0038875630125403404, - 0.005349153187125921, - 0.004649169277399778, - 0.005880181211978197, - 0.007283917628228664, - 0.005552186165004969, - 0.00012677280756179243 - ], - [ - 0.0022015420254319906, - 0.008784863166511059, - 0.002159146359190345, - 0.0010447809472680092, - 0.005142326466739178, - 0.002251626690849662, - 0.0008376616751775146, - 0.006352409720420837, - 0.002618127502501011, - 0.0010309136705473065, - 0.00015219187480397522, - 0.005351166240870953 - ], - [ - 0.007752244360744953, - 0.0030915802344679832, - 0.001362923881970346, - 0.004341960418969393, - 0.011233060620725155, - 0.006535551976412535, - 0.000906877510715276, - 0.0006078600417822599, - 0.002819513902068138, - 0.005254077725112438, - 0.004195652436465025, - 0.00255418848246336 - ], - [ - 0.007342735771089792, - 0.004788339603692293, - 0.007458819076418877, - 0.0033073313534259796, - 0.007871866226196289, - 0.004219769034534693, - 0.004172054585069418, - 0.0005154653917998075, - 0.008124975487589836, - 0.0068268910981714725, - 0.008085492067039013, - 3.761376626831847e-11 - ], - [ - 0.4337766170501709, - 0.9306095838546753, - 0.006382268853485584, - 0.0034730439074337482, - 0.005500996019691229, - 0.9255973696708679, - 0.00538142304867506, - 0.007857315242290497, - 0.00863779615610838, - 0.01576443389058113, - 0.012188379652798176, - 0.008265726268291473 - ], - [ - 0.002507298020645976, - 0.008432027883827686, - 0.008623305708169937, - 0.007653353735804558, - 0.01105806790292263, - 0.005525435321033001, - 0.017205175012350082, - 0.004794349893927574, - 0.0040976013988256454, - 0.9257788062095642, - 0.020375633612275124, - 0.006313954945653677 - ], - [ - 0.005555536597967148, - 0.18942977488040924, - 0.8509925007820129, - 0.008273146115243435, - 0.008239664137363434, - 0.00864996388554573, - 0.02832852303981781, - 0.08996275067329407, - 0.006617339327931404, - 0.009413909167051315, - 0.9037814736366272, - 0.03037159889936447 - ], - [ - 0.00735454261302948, - 0.3791317641735077, - 0.005602709017693996, - 0.025401461869478226, - 0.008504674769937992, - 0.00623108958825469, - 0.11892436444759369, - 0.005114651285111904, - 0.013350939378142357, - 0.01576736941933632, - 0.025843923911452293, - 0.008429747074842453 - ], - [ - 0.2398916333913803, - 0.14378757774829865, - 0.09330663084983826, - 0.005819779820740223, - 0.07744801044464111, - 0.01644793339073658, - 0.4442836344242096, - 0.011141352355480194, - 0.03619001433253288, - 0.472646564245224, - 0.00803996529430151, - 0.030953049659729004 - ], - [ - 0.3606555163860321, - 0.48201146721839905, - 0.022851115092635155, - 0.1264195442199707, - 0.04125598818063736, - 0.0072374604642391205, - 0.2877156138420105, - 0.3897320628166199, - 0.030060900375247, - 0.006112942937761545, - 0.1655488908290863, - 0.22245149314403534 - ], - [ - 0.007408542558550835, - 0.033737149089574814, - 0.02041277289390564, - 0.002755412133410573, - 0.02518630214035511, - 0.07808877527713776, - 0.033082809299230576, - 0.046440087258815765, - 0.0032543439883738756, - 0.2744256258010864, - 0.3800230026245117, - 0.009483495727181435 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Induction Head Scores" - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "imshow(\n", " prev_token_scores, labels={\"x\": \"Head\", \"y\": \"Layer\"}, title=\"Previous Token Scores\"\n", @@ -17763,21 +1747,9 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Top Name Mover to ablate: L9H9\n", - "Original logit diff: 3.55\n", - "Post ablation logit diff: 2.92\n", - "Direct Logit Attribution of top name mover head: 2.99\n", - "Naive prediction of post ablation logit diff: 0.57\n" - ] - } - ], + "outputs": [], "source": [ "top_name_mover = per_head_logit_diffs.flatten().argmax().item()\n", "top_name_mover_layer = top_name_mover // model.cfg.n_heads\n", @@ -17786,6 +1758,7 @@ "\n", "\n", "def ablate_top_head_hook(z: Float[torch.Tensor, \"batch pos head_index d_head\"], hook):\n", + " z = z.clone()\n", " z[:, -1, top_name_mover_head, :] = 0\n", " return z\n", "\n", @@ -17818,2420 +1791,9 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Tried to stack head results when they weren't cached. Computing head results now\n" - ] - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "coloraxis": "coloraxis", - "hovertemplate": "Head: %{x}
Layer: %{y}
color: %{z}", - "name": "0", - "type": "heatmap", - "xaxis": "x", - "yaxis": "y", - "z": [ - [ - -0.002156503964215517, - -0.0004650682385545224, - 0.00024167183437384665, - 0.0002806585980579257, - -0.0004162999684922397, - -0.0004892416181974113, - -0.002620948012918234, - -0.002935677068307996, - 0.00042561208829283714, - 0.0005418329383246601, - 0.00023754138965159655, - -7.48957390896976e-05 - ], - [ - -0.000658505829051137, - 0.0004060641804244369, - -0.0009330413886345923, - 0.0008937822422012687, - -0.0009785268921405077, - -0.000533820129930973, - -0.0027988189831376076, - -0.004214101936668158, - 0.002578593324869871, - 0.0024506838526576757, - 0.0005351756699383259, - 0.0012349633034318686 - ], - [ - 0.0009405204327777028, - -0.0011168691562488675, - -0.0011541967978700995, - -0.0015697095077484846, - -0.0005699327448382974, - 0.001451514894142747, - 0.002439911477267742, - 0.003158293664455414, - 0.000923738582059741, - -0.003578126197680831, - -0.0010650777257978916, - -0.0003558753523975611 - ], - [ - -0.0005624951445497572, - -1.1960582924075425e-05, - 0.0011531109921634197, - 0.0007360265008173883, - 0.0016493839211761951, - 0.0008800819050520658, - -0.0006905529880896211, - -0.003031972097232938, - 0.0008080147090367973, - 0.00010368914809077978, - -0.0005807994166389108, - -0.0011067037703469396 - ], - [ - -0.0026375530287623405, - 0.0002691895351745188, - -0.0016417437000200152, - -0.003406986128538847, - 0.0017449699807912111, - 0.00046454701805487275, - -0.0007899806369096041, - 0.0018328562146052718, - -0.00086324627045542, - -0.0003978293389081955, - 0.0007879206677898765, - -0.00012048585631418973 - ], - [ - 0.0008688560919836164, - 0.0009473530226387084, - -0.0022812988609075546, - -0.0011803123634308577, - 0.0002407809515716508, - -0.0004318578285165131, - -0.0003728170122485608, - -0.000738416681997478, - 0.0008113418589346111, - -0.00040444196201860905, - -0.007074396125972271, - 0.003946478478610516 - ], - [ - -0.014917617663741112, - -0.0022801742888987064, - 0.0022679336834698915, - -8.302251808345318e-05, - -0.004980948753654957, - 0.0027670026756823063, - 0.006266288459300995, - -0.003485947148874402, - -0.0013348984066396952, - -0.0017918883822858334, - -0.0012231896398589015, - 0.00040514359716326 - ], - [ - -0.0002460568503011018, - -0.005790225230157375, - -0.0004975841729901731, - 0.142182856798172, - -0.0014961492270231247, - -0.019006317481398582, - 0.003133433870971203, - -0.001858205534517765, - -0.011305196210741997, - 0.1922595500946045, - -0.0011892566690221429, - -0.0010282933944836259 - ], - [ - -0.0038003993686288595, - -0.0008570950012654066, - -0.013956742361187935, - 0.00828910805284977, - 0.004315475933253765, - -0.009073829278349876, - -0.08315148949623108, - 0.0034569751005619764, - -0.01805492490530014, - 0.002178061753511429, - 0.29780513048171997, - 0.02409379370510578 - ], - [ - 0.08904723823070526, - -0.0007931794971227646, - 0.07247699797153473, - 0.015016308054327965, - -0.02120928093791008, - 0.05205465108156204, - 1.4411165714263916, - 0.04743674397468567, - -0.03229031339287758, - 0, - 0.0019993737805634737, - -0.00807223655283451 - ], - [ - 0.8600788116455078, - 0.3260062038898468, - 0.16344408690929413, - 0.07133537530899048, - -0.00444837287068367, - 0.000681330740917474, - 0.36613449454307556, - -0.7105098962783813, - -0.002031375654041767, - -0.032143525779247284, - 1.2294330596923828, - 0.0018453558441251516 - ], - [ - 0.016877274960279465, - -0.001730365096591413, - -0.5010868310928345, - 0.02749764919281006, - -0.0059662917628884315, - -0.004944110754877329, - -0.08855228126049042, - 0.006622308399528265, - 0.044124361127614975, - -0.02726735547184944, - -1.134916067123413, - 0.02287953346967697 - ] - ] - } - ], - "layout": { - "coloraxis": { - "cmid": 0, - "colorscale": [ - [ - 0, - "rgb(103,0,31)" - ], - [ - 0.1, - "rgb(178,24,43)" - ], - [ - 0.2, - "rgb(214,96,77)" - ], - [ - 0.3, - "rgb(244,165,130)" - ], - [ - 0.4, - "rgb(253,219,199)" - ], - [ - 0.5, - "rgb(247,247,247)" - ], - [ - 0.6, - "rgb(209,229,240)" - ], - [ - 0.7, - "rgb(146,197,222)" - ], - [ - 0.8, - "rgb(67,147,195)" - ], - [ - 0.9, - "rgb(33,102,172)" - ], - [ - 1, - "rgb(5,48,97)" - ] - ] - }, - "margin": { - "t": 60 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "xaxis": { - "anchor": "y", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "scaleanchor": "y", - "title": { - "text": "Head" - } - }, - "yaxis": { - "anchor": "x", - "autorange": "reversed", - "constrain": "domain", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Layer" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "%{hovertext}

Ablated=%{x}
Original=%{y}", - "hovertext": [ - "L0H0", - "L0H1", - "L0H2", - "L0H3", - "L0H4", - "L0H5", - "L0H6", - "L0H7", - "L0H8", - "L0H9", - "L0H10", - "L0H11", - "L1H0", - "L1H1", - "L1H2", - "L1H3", - "L1H4", - "L1H5", - "L1H6", - "L1H7", - "L1H8", - "L1H9", - "L1H10", - "L1H11", - "L2H0", - "L2H1", - "L2H2", - "L2H3", - "L2H4", - "L2H5", - "L2H6", - "L2H7", - "L2H8", - "L2H9", - "L2H10", - "L2H11", - "L3H0", - "L3H1", - "L3H2", - "L3H3", - "L3H4", - "L3H5", - "L3H6", - "L3H7", - "L3H8", - "L3H9", - "L3H10", - "L3H11", - "L4H0", - "L4H1", - "L4H2", - "L4H3", - "L4H4", - "L4H5", - "L4H6", - "L4H7", - "L4H8", - "L4H9", - "L4H10", - "L4H11", - "L5H0", - "L5H1", - "L5H2", - "L5H3", - "L5H4", - "L5H5", - "L5H6", - "L5H7", - "L5H8", - "L5H9", - "L5H10", - "L5H11", - "L6H0", - "L6H1", - "L6H2", - "L6H3", - "L6H4", - "L6H5", - "L6H6", - "L6H7", - "L6H8", - "L6H9", - "L6H10", - "L6H11", - "L7H0", - "L7H1", - "L7H2", - "L7H3", - "L7H4", - "L7H5", - "L7H6", - "L7H7", - "L7H8", - "L7H9", - "L7H10", - "L7H11", - "L8H0", - "L8H1", - "L8H2", - "L8H3", - "L8H4", - "L8H5", - "L8H6", - "L8H7", - "L8H8", - "L8H9", - "L8H10", - "L8H11", - "L9H0", - "L9H1", - "L9H2", - "L9H3", - "L9H4", - "L9H5", - "L9H6", - "L9H7", - "L9H8", - "L9H9", - "L9H10", - "L9H11", - "L10H0", - "L10H1", - "L10H2", - "L10H3", - "L10H4", - "L10H5", - "L10H6", - "L10H7", - "L10H8", - "L10H9", - "L10H10", - "L10H11", - "L11H0", - "L11H1", - "L11H2", - "L11H3", - "L11H4", - "L11H5", - "L11H6", - "L11H7", - "L11H8", - "L11H9", - "L11H10", - "L11H11" - ], - "legendgroup": "", - "marker": { - "color": "#636efa", - "symbol": "circle" - }, - "mode": "markers", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - -0.002156503964215517, - -0.0004650682385545224, - 0.00024167183437384665, - 0.0002806585980579257, - -0.0004162999684922397, - -0.0004892416181974113, - -0.002620948012918234, - -0.002935677068307996, - 0.00042561208829283714, - 0.0005418329383246601, - 0.00023754138965159655, - -7.48957390896976e-05, - -0.000658505829051137, - 0.0004060641804244369, - -0.0009330413886345923, - 0.0008937822422012687, - -0.0009785268921405077, - -0.000533820129930973, - -0.0027988189831376076, - -0.004214101936668158, - 0.002578593324869871, - 0.0024506838526576757, - 0.0005351756699383259, - 0.0012349633034318686, - 0.0009405204327777028, - -0.0011168691562488675, - -0.0011541967978700995, - -0.0015697095077484846, - -0.0005699327448382974, - 0.001451514894142747, - 0.002439911477267742, - 0.003158293664455414, - 0.000923738582059741, - -0.003578126197680831, - -0.0010650777257978916, - -0.0003558753523975611, - -0.0005624951445497572, - -1.1960582924075425e-05, - 0.0011531109921634197, - 0.0007360265008173883, - 0.0016493839211761951, - 0.0008800819050520658, - -0.0006905529880896211, - -0.003031972097232938, - 0.0008080147090367973, - 0.00010368914809077978, - -0.0005807994166389108, - -0.0011067037703469396, - -0.0026375530287623405, - 0.0002691895351745188, - -0.0016417437000200152, - -0.003406986128538847, - 0.0017449699807912111, - 0.00046454701805487275, - -0.0007899806369096041, - 0.0018328562146052718, - -0.00086324627045542, - -0.0003978293389081955, - 0.0007879206677898765, - -0.00012048585631418973, - 0.0008688560919836164, - 0.0009473530226387084, - -0.0022812988609075546, - -0.0011803123634308577, - 0.0002407809515716508, - -0.0004318578285165131, - -0.0003728170122485608, - -0.000738416681997478, - 0.0008113418589346111, - -0.00040444196201860905, - -0.007074396125972271, - 0.003946478478610516, - -0.014917617663741112, - -0.0022801742888987064, - 0.0022679336834698915, - -8.302251808345318e-05, - -0.004980948753654957, - 0.0027670026756823063, - 0.006266288459300995, - -0.003485947148874402, - -0.0013348984066396952, - -0.0017918883822858334, - -0.0012231896398589015, - 0.00040514359716326, - -0.0002460568503011018, - -0.005790225230157375, - -0.0004975841729901731, - 0.142182856798172, - -0.0014961492270231247, - -0.019006317481398582, - 0.003133433870971203, - -0.001858205534517765, - -0.011305196210741997, - 0.1922595500946045, - -0.0011892566690221429, - -0.0010282933944836259, - -0.0038003993686288595, - -0.0008570950012654066, - -0.013956742361187935, - 0.00828910805284977, - 0.004315475933253765, - -0.009073829278349876, - -0.08315148949623108, - 0.0034569751005619764, - -0.01805492490530014, - 0.002178061753511429, - 0.29780513048171997, - 0.02409379370510578, - 0.08904723823070526, - -0.0007931794971227646, - 0.07247699797153473, - 0.015016308054327965, - -0.02120928093791008, - 0.05205465108156204, - 1.4411165714263916, - 0.04743674397468567, - -0.03229031339287758, - 0, - 0.0019993737805634737, - -0.00807223655283451, - 0.8600788116455078, - 0.3260062038898468, - 0.16344408690929413, - 0.07133537530899048, - -0.00444837287068367, - 0.000681330740917474, - 0.36613449454307556, - -0.7105098962783813, - -0.002031375654041767, - -0.032143525779247284, - 1.2294330596923828, - 0.0018453558441251516, - 0.016877274960279465, - -0.001730365096591413, - -0.5010868310928345, - 0.02749764919281006, - -0.0059662917628884315, - -0.004944110754877329, - -0.08855228126049042, - 0.006622308399528265, - 0.044124361127614975, - -0.02726735547184944, - -1.134916067123413, - 0.02287953346967697 - ], - "xaxis": "x", - "y": [ - -0.0020563392899930477, - -0.0005101899732835591, - 0.0004685786843765527, - 0.00012512074317783117, - -0.0006028738571330905, - -0.0002429460291750729, - -0.0023189077619463205, - -0.002758360467851162, - 0.000564602785743773, - 0.0009697531932033598, - -0.0002504526637494564, - 4.737317794933915e-06, - -0.0010070882271975279, - 0.00039470894262194633, - -0.00154874159488827, - 0.0014034928753972054, - -0.0012653048615902662, - -0.0011358022456988692, - -0.00281596090644598, - -0.0029645217582583427, - 0.0029190476052463055, - 0.0025743592996150255, - 0.00036239007022231817, - 0.0017548729665577412, - 0.0005569400964304805, - -0.001126631861552596, - -0.0017353934235870838, - -0.0014514457434415817, - -0.00028735760133713484, - 0.0017211002996191382, - 0.0026658899150788784, - 0.00311466702260077, - 0.0005667927907779813, - -0.003666515462100506, - -0.0018847601022571325, - 7.039372576400638e-06, - -0.0007264417363330722, - 0.00011364505917299539, - 0.0014301587361842394, - 0.0007490540738217533, - 0.0020184689201414585, - 0.0007436950691044331, - -0.00046178390039131045, - -0.0039057559333741665, - 0.0011406694538891315, - -4.022853681817651e-05, - -0.0013293239753693342, - -0.0017636751290410757, - -0.0028280913829803467, - 0.00033634810824878514, - -0.0014248639345169067, - -0.003777273464947939, - 0.0015998880844563246, - 0.0002989505883306265, - -0.000804675742983818, - 0.002038792008534074, - -0.0015593919670209289, - -0.0006436670082621276, - 0.0011168173514306545, - -0.00035012533771805465, - 0.0011338205076754093, - 0.0011259170714765787, - -0.002516670385375619, - -0.0014790185960009694, - 0.0003878737334161997, - -6.408110493794084e-05, - -0.0005096744280308485, - -0.0008840755908749998, - 0.0006398351397365332, - -0.0010097370250150561, - -0.006759158335626125, - 0.0033667823299765587, - -0.01514742337167263, - -0.0021350777242332697, - 0.002593174111098051, - -0.00042678468162193894, - -0.005558924749493599, - 0.0026658528950065374, - 0.006411008536815643, - -0.003826778382062912, - -0.0003843410813715309, - -0.0016430341638624668, - -0.0013344454346224666, - -9.20506427064538e-05, - -9.476230479776859e-05, - -0.0057889921590685844, - -0.0006383581785485148, - 0.13493388891220093, - -0.001768707763403654, - -0.018917907029390335, - 0.003873429261147976, - -0.0021450775675475597, - -0.010327338241040707, - 0.18325845897197723, - -0.0007747983909212053, - -0.00104526337236166, - -0.003833949100226164, - -0.0008046097937040031, - -0.012673400342464447, - 0.00804573018103838, - 0.003604492638260126, - -0.009398287162184715, - -0.08272082358598709, - 0.003555194940418005, - -0.018404025584459305, - 0.0017587244510650635, - 0.2896133363246918, - 0.022854052484035492, - 0.08595258742570877, - -0.0006932877004146576, - 0.06817055493593216, - 0.013111240230500698, - -0.021098043769598007, - 0.05112447217106819, - 1.3844914436340332, - 0.045836858451366425, - -0.03830280900001526, - 2.985445976257324, - 0.0019662054255604744, - -0.008030137047171593, - 0.5608693957328796, - 0.17083050310611725, - -0.03361757844686508, - 0.05821544677019119, - -0.0024530249647796154, - 0.0018771197646856308, - 0.28827205300331116, - -1.8986485004425049, - -0.0015286931302398443, - -0.035129792988300323, - 0.4802178740501404, - -0.0009115453576669097, - 0.016075748950242996, - -0.03986122086644173, - -0.3879126012325287, - 0.011123123578727245, - -0.005477819126099348, - -0.0025129620917141438, - -0.08056175708770752, - 0.007518616039305925, - 0.0430111438035965, - -0.040082238614559174, - -0.9702364802360535, - 0.011862239800393581 - ], - "yaxis": "y" - } - ], - "layout": { - "legend": { - "tracegroupgap": 0 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Original vs Post-Ablation Direct Logit Attribution of Heads" - }, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "range": [ - -3, - 3 - ], - "title": { - "text": "Ablated" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "range": [ - -3, - 3 - ], - "title": { - "text": "Original" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "per_head_ablated_residual, labels = ablated_cache.stack_head_results(\n", " layer=-1, pos_slice=-1, return_labels=True\n", @@ -20265,33 +1827,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Average LN scaling ratio: 1.042\n", - "Ablation LN scale tensor([[18.5200],\n", - " [17.4700],\n", - " [17.8200],\n", - " [17.5100],\n", - " [17.2600],\n", - " [18.2500],\n", - " [16.1800],\n", - " [17.4300]])\n", - "Original LN scale tensor([[19.5700],\n", - " [18.3500],\n", - " [18.2900],\n", - " [18.6800],\n", - " [17.4900],\n", - " [18.8700],\n", - " [16.4200],\n", - " [18.6800]])\n" - ] - } - ], + "outputs": [], "source": [ "print(\n", " \"Average LN scaling ratio:\",\n", @@ -20326,7 +1864,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "transformer-lens", "language": "python", "name": "python3" }, @@ -20340,14 +1878,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "vscode": { - "interpreter": { - "hash": "eb812820b5094695c8a581672e17220e30dd2c15d704c018326e3cc2e1a566f1" - } + "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/demos/Patchscopes_Generation_Demo.ipynb b/demos/Patchscopes_Generation_Demo.ipynb index 2a9109154..8f06af4cc 100644 --- a/demos/Patchscopes_Generation_Demo.ipynb +++ b/demos/Patchscopes_Generation_Demo.ipynb @@ -30,48 +30,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", - "import os\n", - "\n", - "DEBUG_MODE = False\n", - "IN_GITHUB = os.getenv(\"GITHUB_ACTIONS\") == \"true\"\n", - "try:\n", - " import google.colab\n", - "\n", - " IN_COLAB = True\n", - " print(\"Running as a Colab notebook\")\n", - "except:\n", - " IN_COLAB = False\n", - " print(\"Running as a Jupyter notebook - intended for development only!\")\n", - " from IPython import get_ipython\n", - "\n", - " ipython = get_ipython()\n", - " # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n", - " ipython.run_line_magic(\"load_ext\", \"autoreload\")\n", - " ipython.run_line_magic(\"autoreload\", \"2\")\n", - "\n", - "if IN_COLAB or IN_GITHUB:\n", - " %pip install transformer_lens\n", - " %pip install torchtyping\n", - " # Install my janky personal plotting utils\n", - " %pip install git+https://github.com/neelnanda-io/neel-plotly.git\n", - " # Install another version of node that makes PySvelte work way faster\n", - " %pip install circuitsvis\n", - " # Needed for PySvelte to work, v3 came out and broke things...\n", - " %pip install typeguard==2.13.3\n", - "\n", - "import torch\n", - "from typing import List, Callable, Tuple, Union\n", - "from functools import partial\n", - "from jaxtyping import Float\n", - "from transformer_lens import HookedTransformer\n", - "from transformer_lens.ActivationCache import ActivationCache\n", - "import transformer_lens.utils as utils\n", - "from transformer_lens.hook_points import (\n", - " HookPoint,\n", - ") # Hooking utilities" - ] + "source": "# NBVAL_IGNORE_OUTPUT\n# Janky code to do different setup when run in a Colab notebook vs VSCode\nimport os\n\nDEBUG_MODE = False\nIN_GITHUB = os.getenv(\"GITHUB_ACTIONS\") == \"true\"\ntry:\n import google.colab\n\n IN_COLAB = True\n print(\"Running as a Colab notebook\")\nexcept:\n IN_COLAB = False\n\nif not IN_GITHUB and not IN_COLAB:\n print(\"Running as a Jupyter notebook - intended for development only!\")\n from IPython import get_ipython\n\n ipython = get_ipython()\n # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n ipython.run_line_magic(\"load_ext\", \"autoreload\")\n ipython.run_line_magic(\"autoreload\", \"2\")\n\nif IN_COLAB or IN_GITHUB:\n %pip install transformer_lens\n %pip install torchtyping\n # Install my janky personal plotting utils\n %pip install git+https://github.com/neelnanda-io/neel-plotly.git\n # Install another version of node that makes PySvelte work way faster\n %pip install circuitsvis\n # Needed for PySvelte to work, v3 came out and broke things...\n %pip install typeguard==2.13.3\n\nimport torch\nfrom typing import List, Callable, Tuple, Union\nfrom functools import partial\nfrom jaxtyping import Float\nfrom transformer_lens.model_bridge import TransformerBridge\nfrom transformer_lens.ActivationCache import ActivationCache\nimport transformer_lens.utils as utils\nfrom transformer_lens.hook_points import (\n HookPoint,\n) # Hooking utilities" }, { "cell_type": "markdown", @@ -148,78 +107,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded pretrained model gpt2-small into HookedTransformer\n" - ] - }, - { - "data": { - "text/plain": [ - "HookedTransformer(\n", - " (embed): Embed()\n", - " (hook_embed): HookPoint()\n", - " (pos_embed): PosEmbed()\n", - " (hook_pos_embed): HookPoint()\n", - " (blocks): ModuleList(\n", - " (0-11): 12 x TransformerBlock(\n", - " (ln1): LayerNormPre(\n", - " (hook_scale): HookPoint()\n", - " (hook_normalized): HookPoint()\n", - " )\n", - " (ln2): LayerNormPre(\n", - " (hook_scale): HookPoint()\n", - " (hook_normalized): HookPoint()\n", - " )\n", - " (attn): Attention(\n", - " (hook_k): HookPoint()\n", - " (hook_q): HookPoint()\n", - " (hook_v): HookPoint()\n", - " (hook_z): HookPoint()\n", - " (hook_attn_scores): HookPoint()\n", - " (hook_pattern): HookPoint()\n", - " (hook_result): HookPoint()\n", - " )\n", - " (mlp): MLP(\n", - " (hook_pre): HookPoint()\n", - " (hook_post): HookPoint()\n", - " )\n", - " (hook_attn_in): HookPoint()\n", - " (hook_q_input): HookPoint()\n", - " (hook_k_input): HookPoint()\n", - " (hook_v_input): HookPoint()\n", - " (hook_mlp_in): HookPoint()\n", - " (hook_attn_out): HookPoint()\n", - " (hook_mlp_out): HookPoint()\n", - " (hook_resid_pre): HookPoint()\n", - " (hook_resid_mid): HookPoint()\n", - " (hook_resid_post): HookPoint()\n", - " )\n", - " )\n", - " (ln_final): LayerNormPre(\n", - " (hook_scale): HookPoint()\n", - " (hook_normalized): HookPoint()\n", - " )\n", - " (unembed): Unembed()\n", - ")" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# NBVAL_IGNORE_OUTPUT\n", - "# I'm using an M2 macbook air, so I use CPU for better support\n", - "model = HookedTransformer.from_pretrained(\"gpt2-small\", device=\"cpu\")\n", - "model.eval()" - ] + "outputs": [], + "source": "# NBVAL_IGNORE_OUTPUT\n# I'm using an M2 macbook air, so I use CPU for better support\nmodel = TransformerBridge.boot_transformers(\"gpt2\", device=\"cpu\")\nmodel.enable_compatibility_mode()\nmodel.eval()" }, { "cell_type": "markdown", @@ -263,17 +154,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def get_source_representation(prompts: List[str], layer_id: int, model: HookedTransformer, pos_id: Union[int, List[int]]=None) -> torch.Tensor:\n", + "def get_source_representation(prompts: List[str], layer_id: int, model: TransformerBridge, pos_id: Union[int, List[int]]=None) -> torch.Tensor:\n", " \"\"\"Get source hidden representation represented by (S, i, M, l)\n", " \n", " Args:\n", " - prompts (List[str]): a list of source prompts\n", " - layer_id (int): the layer id of the model\n", - " - model (HookedTransformer): the source model\n", + " - model (TransformerBridge): the source model\n", " - pos_id (Union[int, List[int]]): the position id(s) of the model, if None, return all positions\n", "\n", " Returns:\n", @@ -325,19 +216,19 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# recall the target representation (T,i*,f,M*,l*), and we also need the hidden representation from our source model (S, i, M, l)\n", - "def feed_source_representation(source_rep: torch.Tensor, prompt: List[str], f: Callable, model: HookedTransformer, layer_id: int, pos_id: Union[int, List[int]]=None) -> ActivationCache:\n", + "def feed_source_representation(source_rep: torch.Tensor, prompt: List[str], f: Callable, model: TransformerBridge, layer_id: int, pos_id: Union[int, List[int]]=None) -> ActivationCache:\n", " \"\"\"Feed the source hidden representation to the target model\n", " \n", " Args:\n", " - source_rep (torch.Tensor): the source hidden representation\n", " - prompt (List[str]): the target prompt\n", " - f (Callable): the mapping function\n", - " - model (HookedTransformer): the target model\n", + " - model (TransformerBridge): the target model\n", " - layer_id (int): the layer id of the target model\n", " - pos_id (Union[int, List[int]]): the position id(s) of the target model, if None, return all positions\n", " \"\"\"\n", @@ -417,11 +308,11 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def generate_with_patching(model: HookedTransformer, prompts: List[str], target_f: Callable, max_new_tokens: int = 50):\n", + "def generate_with_patching(model: TransformerBridge, prompts: List[str], target_f: Callable, max_new_tokens: int = 50):\n", " temp_prompts = prompts\n", " input_tokens = model.to_tokens(temp_prompts)\n", " for _ in range(max_new_tokens):\n", @@ -3494,13 +3385,6 @@ " print(f\"Generation by patching layer {target_layer_id}:\\n{gen}\\n{'='*30}\\n\")" ] }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": "" - }, { "cell_type": "markdown", "metadata": {}, @@ -3780,4 +3664,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/demos/Qwen.ipynb b/demos/Qwen.ipynb index 96732abe8..1f4eb2c94 100644 --- a/demos/Qwen.ipynb +++ b/demos/Qwen.ipynb @@ -9,75 +9,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: transformers_stream_generator in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.0.4)\n", - "Requirement already satisfied: plotly in /root/TransformerLens/.venv/lib/python3.10/site-packages (5.18.0)\n", - "Requirement already satisfied: circuitsvis in /root/TransformerLens/.venv/lib/python3.10/site-packages (1.43.2)\n", - "Requirement already satisfied: huggingface_hub in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.20.2)\n", - "Requirement already satisfied: einops in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.7.0)\n", - "Requirement already satisfied: tiktoken in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.5.2)\n", - "Requirement already satisfied: datasets in /root/TransformerLens/.venv/lib/python3.10/site-packages (2.14.4)\n", - "Requirement already satisfied: transformers>=4.26.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers_stream_generator) (4.37.2)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from plotly) (8.2.3)\n", - "Requirement already satisfied: packaging in /root/TransformerLens/.venv/lib/python3.10/site-packages (from plotly) (23.2)\n", - "Requirement already satisfied: importlib-metadata>=5.1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (7.0.1)\n", - "Requirement already satisfied: numpy>=1.24 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (1.26.3)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.18.1)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: torch>=1.10 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.1.2)\n", - "Requirement already satisfied: triton==2.1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.1.0)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->circuitsvis) (12.3.101)\n", - "Requirement already satisfied: filelock in /root/TransformerLens/.venv/lib/python3.10/site-packages (from triton==2.1.0->circuitsvis) (3.13.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (2023.12.2)\n", - "Requirement already satisfied: requests in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.66.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.9.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from tiktoken) (2023.12.25)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (14.0.2)\n", - "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (0.3.7)\n", - "Requirement already satisfied: pandas in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (2.0.3)\n", - "Requirement already satisfied: xxhash in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (3.4.1)\n", - "Requirement already satisfied: multiprocess in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (0.70.15)\n", - "Requirement already satisfied: aiohttp in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (3.9.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: zipp>=0.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from importlib-metadata>=5.1.0->circuitsvis) (3.17.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2.1.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2023.11.17)\n", - "Requirement already satisfied: sympy in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (1.12)\n", - "Requirement already satisfied: networkx in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (3.1)\n", - "Requirement already satisfied: jinja2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (3.1.2)\n", - "Requirement already satisfied: tokenizers<0.19,>=0.14 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers>=4.26.1->transformers_stream_generator) (0.15.0)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers>=4.26.1->transformers_stream_generator) (0.4.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.4)\n", - "Requirement already satisfied: six>=1.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from jinja2->torch>=1.10->circuitsvis) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from sympy->torch>=1.10->circuitsvis) (1.3.0)\n", - "\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m24.0\u001B[0m\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n", + "/Users/jlarson/Documents/PROJECTS/TransformerLens/.venv/bin/python: No module named pip\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", "%pip install transformers_stream_generator plotly circuitsvis huggingface_hub einops tiktoken datasets" ] }, @@ -92,19 +30,10 @@ "text": [ "Running as a Jupyter notebook - intended for development only!\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_13850/410710250.py:21: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"load_ext autoreload\")\n", - "/tmp/ipykernel_13850/410710250.py:22: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"autoreload 2\")\n" - ] } ], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", "DEVELOPMENT_MODE = False\n", "try:\n", @@ -156,23 +85,14 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/root/TransformerLens\n" - ] - } - ], + "outputs": [], "source": [ - "%cd ~/TransformerLens\n", - "import torch\n", "# NBVAL_IGNORE_OUTPUT\n", + "import torch\n", "_ = torch.set_grad_enabled(False)\n", "\n", "from transformers import AutoTokenizer\n", - "from transformer_lens import HookedTransformer\n", + "from transformer_lens.model_bridge import TransformerBridge\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from transformers.generation import GenerationConfig\n", "\n", @@ -195,7 +115,8 @@ " prompt_toks = tokenizer(prompt, return_tensors=\"pt\").input_ids\n", "\n", " hf_logits = hf_model(prompt_toks.to(hf_model.device)).logits\n", - " tl_logits = tl_model(prompt_toks).to(hf_logits)\n", + " tl_device = next(tl_model.parameters()).device\n", + " tl_logits = tl_model(prompt_toks.to(tl_device)).to(hf_logits)\n", "\n", " assert torch.allclose(torch.softmax(hf_logits, dim=-1), torch.softmax(tl_logits, dim=-1), atol=atol)" ] @@ -204,7 +125,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Qwen, first generation" + "## Qwen, first generation\n", + "\n", + "> **Note:** Qwen gen1 (`Qwen/Qwen-1_8B-Chat`) requires `trust_remote_code=True` and depends on\n", + "> `transformers_stream_generator`, which is incompatible with `transformers>=5.0.0`\n", + "> (`DisjunctiveConstraint` was removed). This section is commented out until the upstream\n", + "> dependency is updated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# NBVAL_SKIP\n", + "# model_path = \"Qwen/Qwen-1_8B-Chat\"\n", + "# device = \"cuda\" if torch.cuda.is_available() else \"mps\" if torch.backends.mps.is_available() else \"cpu\"\n", + "# \n", + "# tokenizer = AutoTokenizer.from_pretrained(\n", + "# model_path,\n", + "# trust_remote_code=True\n", + "# )\n", + "# \n", + "# hf_model = AutoModelForCausalLM.from_pretrained(\n", + "# model_path,\n", + "# device_map=device,\n", + "# torch_dtype=torch.float32,\n", + "# use_logn_attn=False,\n", + "# use_dynamic_ntk=False,\n", + "# scale_attn_weights=False,\n", + "# trust_remote_code=True\n", + "# ).eval()\n", + "# \n", + "# tl_model = TransformerBridge.boot_transformers(\n", + "# model_path,\n", + "# device=device,\n", + "# dtype=torch.float32,\n", + "# trust_remote_code=True,\n", + "# )\n", + "# tl_model.enable_compatibility_mode()\n", + "# \n", + "# assert_hf_and_tl_model_are_close(hf_model, tl_model, tokenizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Qwen2" ] }, { @@ -216,131 +185,72 @@ "name": "stderr", "output_type": "stream", "text": [ - "Your device support faster inference by passing bf16=True in \"AutoModelForCausalLM.from_pretrained\".\n" + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2cffaf8715b64623b6799822d7cf1cfe", + "model_id": "00c489dffa18461f932645808546a319", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.249099Z", + "iopub.status.busy": "2026-03-05T18:28:00.248953Z", + "iopub.status.idle": "2026-03-05T18:28:00.270989Z", + "shell.execute_reply": "2026-03-05T18:28:00.270706Z" } - ], + }, + "outputs": [], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "_ = torch.set_grad_enabled(False)" @@ -147,8 +70,15 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.272104Z", + "iopub.status.busy": "2026-03-05T18:28:00.272034Z", + "iopub.status.idle": "2026-03-05T18:28:00.411289Z", + "shell.execute_reply": "2026-03-05T18:28:00.411042Z" + } + }, "outputs": [ { "name": "stdout", @@ -161,6 +91,7 @@ "generated token: \"êtes\", token id: 6738\n", "generated token: \"-\", token id: 18\n", "generated token: \"vous\", token id: 3249\n", + "generated token: \"\", token id: 3\n", "generated token: \"?\", token id: 58\n", "generated token: \"\", token id: 1\n", "translate English to French: Hello, how are you? \n", @@ -204,14 +135,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.412318Z", + "iopub.status.busy": "2026-03-05T18:28:00.412247Z", + "iopub.status.idle": "2026-03-05T18:28:00.442072Z", + "shell.execute_reply": "2026-03-05T18:28:00.441869Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "torch.Size([1, 1, 32128])\n" + "torch.Size([1, 13, 32128])\n" ] } ], @@ -230,14 +168,21 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.443081Z", + "iopub.status.busy": "2026-03-05T18:28:00.443015Z", + "iopub.status.idle": "2026-03-05T18:28:00.477236Z", + "shell.execute_reply": "2026-03-05T18:28:00.477027Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "torch.Size([3, 1, 32128])\n" + "torch.Size([3, 14, 32128])\n" ] } ], @@ -261,8 +206,15 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.478310Z", + "iopub.status.busy": "2026-03-05T18:28:00.478239Z", + "iopub.status.idle": "2026-03-05T18:28:00.630028Z", + "shell.execute_reply": "2026-03-05T18:28:00.629766Z" + } + }, "outputs": [ { "name": "stdout", @@ -288,27 +240,34 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.631124Z", + "iopub.status.busy": "2026-03-05T18:28:00.631044Z", + "iopub.status.idle": "2026-03-05T18:28:00.834050Z", + "shell.execute_reply": "2026-03-05T18:28:00.833519Z" + } + }, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 20, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -321,8 +280,15 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.835974Z", + "iopub.status.busy": "2026-03-05T18:28:00.835821Z", + "iopub.status.idle": "2026-03-05T18:28:00.895410Z", + "shell.execute_reply": "2026-03-05T18:28:00.895112Z" + } + }, "outputs": [], "source": [ "prompt = \"translate English to French: Hello, how are you? \"\n", @@ -336,327 +302,683 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.896537Z", + "iopub.status.busy": "2026-03-05T18:28:00.896476Z", + "iopub.status.idle": "2026-03-05T18:28:00.915758Z", + "shell.execute_reply": "2026-03-05T18:28:00.915512Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "hook_embed\n", - "encoder.0.hook_resid_pre\n", - "encoder.0.ln1.hook_scale\n", - "encoder.0.ln1.hook_normalized\n", - "encoder.0.attn.hook_q\n", - "encoder.0.attn.hook_k\n", - "encoder.0.attn.hook_v\n", - "encoder.0.attn.hook_attn_scores\n", - "encoder.0.attn.hook_pattern\n", - "encoder.0.attn.hook_z\n", - "encoder.0.hook_attn_out\n", - "encoder.0.hook_resid_mid\n", - "encoder.0.ln2.hook_scale\n", - "encoder.0.ln2.hook_normalized\n", - "encoder.0.mlp.hook_pre\n", - "encoder.0.mlp.hook_post\n", - "encoder.0.hook_mlp_out\n", - "encoder.0.hook_resid_post\n", - "encoder.1.hook_resid_pre\n", - "encoder.1.ln1.hook_scale\n", - "encoder.1.ln1.hook_normalized\n", - "encoder.1.attn.hook_q\n", - "encoder.1.attn.hook_k\n", - "encoder.1.attn.hook_v\n", - "encoder.1.attn.hook_attn_scores\n", - "encoder.1.attn.hook_pattern\n", - "encoder.1.attn.hook_z\n", - "encoder.1.hook_attn_out\n", - "encoder.1.hook_resid_mid\n", - "encoder.1.ln2.hook_scale\n", - "encoder.1.ln2.hook_normalized\n", - "encoder.1.mlp.hook_pre\n", - "encoder.1.mlp.hook_post\n", - "encoder.1.hook_mlp_out\n", - "encoder.1.hook_resid_post\n", - "encoder.2.hook_resid_pre\n", - "encoder.2.ln1.hook_scale\n", - "encoder.2.ln1.hook_normalized\n", - "encoder.2.attn.hook_q\n", - "encoder.2.attn.hook_k\n", - "encoder.2.attn.hook_v\n", - "encoder.2.attn.hook_attn_scores\n", - "encoder.2.attn.hook_pattern\n", - "encoder.2.attn.hook_z\n", - "encoder.2.hook_attn_out\n", - "encoder.2.hook_resid_mid\n", - "encoder.2.ln2.hook_scale\n", - "encoder.2.ln2.hook_normalized\n", - "encoder.2.mlp.hook_pre\n", - "encoder.2.mlp.hook_post\n", - "encoder.2.hook_mlp_out\n", - "encoder.2.hook_resid_post\n", - "encoder.3.hook_resid_pre\n", - "encoder.3.ln1.hook_scale\n", - "encoder.3.ln1.hook_normalized\n", - "encoder.3.attn.hook_q\n", - "encoder.3.attn.hook_k\n", - "encoder.3.attn.hook_v\n", - "encoder.3.attn.hook_attn_scores\n", - "encoder.3.attn.hook_pattern\n", - "encoder.3.attn.hook_z\n", - "encoder.3.hook_attn_out\n", - "encoder.3.hook_resid_mid\n", - "encoder.3.ln2.hook_scale\n", - "encoder.3.ln2.hook_normalized\n", - "encoder.3.mlp.hook_pre\n", - "encoder.3.mlp.hook_post\n", - "encoder.3.hook_mlp_out\n", - "encoder.3.hook_resid_post\n", - "encoder.4.hook_resid_pre\n", - "encoder.4.ln1.hook_scale\n", - "encoder.4.ln1.hook_normalized\n", - "encoder.4.attn.hook_q\n", - "encoder.4.attn.hook_k\n", - "encoder.4.attn.hook_v\n", - "encoder.4.attn.hook_attn_scores\n", - "encoder.4.attn.hook_pattern\n", - "encoder.4.attn.hook_z\n", - "encoder.4.hook_attn_out\n", - "encoder.4.hook_resid_mid\n", - "encoder.4.ln2.hook_scale\n", - "encoder.4.ln2.hook_normalized\n", - "encoder.4.mlp.hook_pre\n", - "encoder.4.mlp.hook_post\n", - "encoder.4.hook_mlp_out\n", - "encoder.4.hook_resid_post\n", - "encoder.5.hook_resid_pre\n", - "encoder.5.ln1.hook_scale\n", - "encoder.5.ln1.hook_normalized\n", - "encoder.5.attn.hook_q\n", - "encoder.5.attn.hook_k\n", - "encoder.5.attn.hook_v\n", - "encoder.5.attn.hook_attn_scores\n", - "encoder.5.attn.hook_pattern\n", - "encoder.5.attn.hook_z\n", - "encoder.5.hook_attn_out\n", - "encoder.5.hook_resid_mid\n", - "encoder.5.ln2.hook_scale\n", - "encoder.5.ln2.hook_normalized\n", - "encoder.5.mlp.hook_pre\n", - "encoder.5.mlp.hook_post\n", - "encoder.5.hook_mlp_out\n", - "encoder.5.hook_resid_post\n", - "encoder_final_ln.hook_scale\n", - "encoder_final_ln.hook_normalized\n", - "decoder.0.hook_resid_pre\n", - "decoder.0.ln1.hook_scale\n", - "decoder.0.ln1.hook_normalized\n", - "decoder.0.attn.hook_q\n", - "decoder.0.attn.hook_k\n", - "decoder.0.attn.hook_v\n", - "decoder.0.attn.hook_attn_scores\n", - "decoder.0.attn.hook_pattern\n", - "decoder.0.attn.hook_z\n", - "decoder.0.hook_attn_out\n", - "decoder.0.hook_resid_mid\n", - "decoder.0.ln2.hook_scale\n", - "decoder.0.ln2.hook_normalized\n", - "decoder.0.cross_attn.hook_q\n", - "decoder.0.cross_attn.hook_k\n", - "decoder.0.cross_attn.hook_v\n", - "decoder.0.cross_attn.hook_attn_scores\n", - "decoder.0.cross_attn.hook_pattern\n", - "decoder.0.cross_attn.hook_z\n", - "decoder.0.hook_cross_attn_out\n", - "decoder.0.hook_resid_mid_cross\n", - "decoder.0.ln3.hook_scale\n", - "decoder.0.ln3.hook_normalized\n", - "decoder.0.mlp.hook_pre\n", - "decoder.0.mlp.hook_post\n", - "decoder.0.hook_mlp_out\n", - "decoder.0.hook_resid_post\n", - "decoder.1.hook_resid_pre\n", - "decoder.1.ln1.hook_scale\n", - "decoder.1.ln1.hook_normalized\n", - "decoder.1.attn.hook_q\n", - "decoder.1.attn.hook_k\n", - "decoder.1.attn.hook_v\n", - "decoder.1.attn.hook_attn_scores\n", - "decoder.1.attn.hook_pattern\n", - "decoder.1.attn.hook_z\n", - "decoder.1.hook_attn_out\n", - "decoder.1.hook_resid_mid\n", - "decoder.1.ln2.hook_scale\n", - "decoder.1.ln2.hook_normalized\n", - "decoder.1.cross_attn.hook_q\n", - "decoder.1.cross_attn.hook_k\n", - "decoder.1.cross_attn.hook_v\n", - "decoder.1.cross_attn.hook_attn_scores\n", - "decoder.1.cross_attn.hook_pattern\n", - "decoder.1.cross_attn.hook_z\n", - "decoder.1.hook_cross_attn_out\n", - "decoder.1.hook_resid_mid_cross\n", - "decoder.1.ln3.hook_scale\n", - "decoder.1.ln3.hook_normalized\n", - "decoder.1.mlp.hook_pre\n", - "decoder.1.mlp.hook_post\n", - "decoder.1.hook_mlp_out\n", - "decoder.1.hook_resid_post\n", - "decoder.2.hook_resid_pre\n", - "decoder.2.ln1.hook_scale\n", - "decoder.2.ln1.hook_normalized\n", - "decoder.2.attn.hook_q\n", - "decoder.2.attn.hook_k\n", - "decoder.2.attn.hook_v\n", - "decoder.2.attn.hook_attn_scores\n", - "decoder.2.attn.hook_pattern\n", - "decoder.2.attn.hook_z\n", - "decoder.2.hook_attn_out\n", - "decoder.2.hook_resid_mid\n", - "decoder.2.ln2.hook_scale\n", - "decoder.2.ln2.hook_normalized\n", - "decoder.2.cross_attn.hook_q\n", - "decoder.2.cross_attn.hook_k\n", - "decoder.2.cross_attn.hook_v\n", - "decoder.2.cross_attn.hook_attn_scores\n", - "decoder.2.cross_attn.hook_pattern\n", - "decoder.2.cross_attn.hook_z\n", - "decoder.2.hook_cross_attn_out\n", - "decoder.2.hook_resid_mid_cross\n", - "decoder.2.ln3.hook_scale\n", - "decoder.2.ln3.hook_normalized\n", - "decoder.2.mlp.hook_pre\n", - "decoder.2.mlp.hook_post\n", - "decoder.2.hook_mlp_out\n", - "decoder.2.hook_resid_post\n", - "decoder.3.hook_resid_pre\n", - "decoder.3.ln1.hook_scale\n", - "decoder.3.ln1.hook_normalized\n", - "decoder.3.attn.hook_q\n", - "decoder.3.attn.hook_k\n", - "decoder.3.attn.hook_v\n", - "decoder.3.attn.hook_attn_scores\n", - "decoder.3.attn.hook_pattern\n", - "decoder.3.attn.hook_z\n", - "decoder.3.hook_attn_out\n", - "decoder.3.hook_resid_mid\n", - "decoder.3.ln2.hook_scale\n", - "decoder.3.ln2.hook_normalized\n", - "decoder.3.cross_attn.hook_q\n", - "decoder.3.cross_attn.hook_k\n", - "decoder.3.cross_attn.hook_v\n", - "decoder.3.cross_attn.hook_attn_scores\n", - "decoder.3.cross_attn.hook_pattern\n", - "decoder.3.cross_attn.hook_z\n", - "decoder.3.hook_cross_attn_out\n", - "decoder.3.hook_resid_mid_cross\n", - "decoder.3.ln3.hook_scale\n", - "decoder.3.ln3.hook_normalized\n", - "decoder.3.mlp.hook_pre\n", - "decoder.3.mlp.hook_post\n", - "decoder.3.hook_mlp_out\n", - "decoder.3.hook_resid_post\n", - "decoder.4.hook_resid_pre\n", - "decoder.4.ln1.hook_scale\n", - "decoder.4.ln1.hook_normalized\n", - "decoder.4.attn.hook_q\n", - "decoder.4.attn.hook_k\n", - "decoder.4.attn.hook_v\n", - "decoder.4.attn.hook_attn_scores\n", - "decoder.4.attn.hook_pattern\n", - "decoder.4.attn.hook_z\n", - "decoder.4.hook_attn_out\n", - "decoder.4.hook_resid_mid\n", - "decoder.4.ln2.hook_scale\n", - "decoder.4.ln2.hook_normalized\n", - "decoder.4.cross_attn.hook_q\n", - "decoder.4.cross_attn.hook_k\n", - "decoder.4.cross_attn.hook_v\n", - "decoder.4.cross_attn.hook_attn_scores\n", - "decoder.4.cross_attn.hook_pattern\n", - "decoder.4.cross_attn.hook_z\n", - "decoder.4.hook_cross_attn_out\n", - "decoder.4.hook_resid_mid_cross\n", - "decoder.4.ln3.hook_scale\n", - "decoder.4.ln3.hook_normalized\n", - "decoder.4.mlp.hook_pre\n", - "decoder.4.mlp.hook_post\n", - "decoder.4.hook_mlp_out\n", - "decoder.4.hook_resid_post\n", - "decoder.5.hook_resid_pre\n", - "decoder.5.ln1.hook_scale\n", - "decoder.5.ln1.hook_normalized\n", - "decoder.5.attn.hook_q\n", - "decoder.5.attn.hook_k\n", - "decoder.5.attn.hook_v\n", - "decoder.5.attn.hook_attn_scores\n", - "decoder.5.attn.hook_pattern\n", - "decoder.5.attn.hook_z\n", - "decoder.5.hook_attn_out\n", - "decoder.5.hook_resid_mid\n", - "decoder.5.ln2.hook_scale\n", - "decoder.5.ln2.hook_normalized\n", - "decoder.5.cross_attn.hook_q\n", - "decoder.5.cross_attn.hook_k\n", - "decoder.5.cross_attn.hook_v\n", - "decoder.5.cross_attn.hook_attn_scores\n", - "decoder.5.cross_attn.hook_pattern\n", - "decoder.5.cross_attn.hook_z\n", - "decoder.5.hook_cross_attn_out\n", - "decoder.5.hook_resid_mid_cross\n", - "decoder.5.ln3.hook_scale\n", - "decoder.5.ln3.hook_normalized\n", - "decoder.5.mlp.hook_pre\n", - "decoder.5.mlp.hook_post\n", - "decoder.5.hook_mlp_out\n", - "decoder.5.hook_resid_post\n", - "decoder_final_ln.hook_scale\n", - "decoder_final_ln.hook_normalized\n" + "encoder_blocks.0.hook_in\n", + "encoder_blocks.0.hook_resid_pre\n", + "encoder_blocks.0.ln1.hook_in\n", + "encoder_blocks.0.ln1.hook_scale\n", + "encoder_blocks.0.ln1.hook_normalized\n", + "encoder_blocks.0.ln1.hook_out\n", + "encoder_blocks.0.attn.hook_in\n", + "encoder_blocks.0.attn.q.hook_in\n", + "encoder_blocks.0.attn.hook_q\n", + "encoder_blocks.0.attn.q.hook_out\n", + "encoder_blocks.0.attn.k.hook_in\n", + "encoder_blocks.0.attn.hook_k\n", + "encoder_blocks.0.attn.k.hook_out\n", + "encoder_blocks.0.attn.v.hook_in\n", + "encoder_blocks.0.attn.hook_v\n", + "encoder_blocks.0.attn.v.hook_out\n", + "pos_embed.hook_in\n", + "hook_pos_embed\n", + "pos_embed.hook_out\n", + "encoder_blocks.0.attn.hook_z\n", + "encoder_blocks.0.attn.o.hook_in\n", + "encoder_blocks.0.attn.o.hook_out\n", + "encoder_blocks.0.attn.hook_out\n", + "encoder_blocks.0.attn.hook_result\n", + "encoder_blocks.0.attn.hook_pattern\n", + "encoder_blocks.0.attn.hook_attn_scores\n", + "encoder_blocks.0.hook_resid_mid\n", + "encoder_blocks.0.ln2.hook_in\n", + "encoder_blocks.0.ln2.hook_scale\n", + "encoder_blocks.0.ln2.hook_normalized\n", + "encoder_blocks.0.ln2.hook_out\n", + "encoder_blocks.0.mlp.hook_in\n", + "encoder_blocks.0.mlp.in.hook_in\n", + "encoder_blocks.0.mlp.hook_pre\n", + "encoder_blocks.0.mlp.in.hook_out\n", + "encoder_blocks.0.mlp.hook_post\n", + "encoder_blocks.0.mlp.out.hook_in\n", + "encoder_blocks.0.mlp.out.hook_out\n", + "encoder_blocks.0.mlp.hook_out\n", + "encoder_blocks.0.hook_out\n", + "encoder_blocks.0.hook_resid_post\n", + "encoder_blocks.1.hook_in\n", + "encoder_blocks.1.hook_resid_pre\n", + "encoder_blocks.1.ln1.hook_in\n", + "encoder_blocks.1.ln1.hook_scale\n", + "encoder_blocks.1.ln1.hook_normalized\n", + "encoder_blocks.1.ln1.hook_out\n", + "encoder_blocks.1.attn.hook_in\n", + "encoder_blocks.1.attn.q.hook_in\n", + "encoder_blocks.1.attn.hook_q\n", + "encoder_blocks.1.attn.q.hook_out\n", + "encoder_blocks.1.attn.k.hook_in\n", + "encoder_blocks.1.attn.hook_k\n", + "encoder_blocks.1.attn.k.hook_out\n", + "encoder_blocks.1.attn.v.hook_in\n", + "encoder_blocks.1.attn.hook_v\n", + "encoder_blocks.1.attn.v.hook_out\n", + "encoder_blocks.1.attn.hook_z\n", + "encoder_blocks.1.attn.o.hook_in\n", + "encoder_blocks.1.attn.o.hook_out\n", + "encoder_blocks.1.attn.hook_out\n", + "encoder_blocks.1.attn.hook_result\n", + "encoder_blocks.1.attn.hook_pattern\n", + "encoder_blocks.1.attn.hook_attn_scores\n", + "encoder_blocks.1.hook_resid_mid\n", + "encoder_blocks.1.ln2.hook_in\n", + "encoder_blocks.1.ln2.hook_scale\n", + "encoder_blocks.1.ln2.hook_normalized\n", + "encoder_blocks.1.ln2.hook_out\n", + "encoder_blocks.1.mlp.hook_in\n", + "encoder_blocks.1.mlp.in.hook_in\n", + "encoder_blocks.1.mlp.hook_pre\n", + "encoder_blocks.1.mlp.in.hook_out\n", + "encoder_blocks.1.mlp.hook_post\n", + "encoder_blocks.1.mlp.out.hook_in\n", + "encoder_blocks.1.mlp.out.hook_out\n", + "encoder_blocks.1.mlp.hook_out\n", + "encoder_blocks.1.hook_out\n", + "encoder_blocks.1.hook_resid_post\n", + "encoder_blocks.2.hook_in\n", + "encoder_blocks.2.hook_resid_pre\n", + "encoder_blocks.2.ln1.hook_in\n", + "encoder_blocks.2.ln1.hook_scale\n", + "encoder_blocks.2.ln1.hook_normalized\n", + "encoder_blocks.2.ln1.hook_out\n", + "encoder_blocks.2.attn.hook_in\n", + "encoder_blocks.2.attn.q.hook_in\n", + "encoder_blocks.2.attn.hook_q\n", + "encoder_blocks.2.attn.q.hook_out\n", + "encoder_blocks.2.attn.k.hook_in\n", + "encoder_blocks.2.attn.hook_k\n", + "encoder_blocks.2.attn.k.hook_out\n", + "encoder_blocks.2.attn.v.hook_in\n", + "encoder_blocks.2.attn.hook_v\n", + "encoder_blocks.2.attn.v.hook_out\n", + "encoder_blocks.2.attn.hook_z\n", + "encoder_blocks.2.attn.o.hook_in\n", + "encoder_blocks.2.attn.o.hook_out\n", + "encoder_blocks.2.attn.hook_out\n", + "encoder_blocks.2.attn.hook_result\n", + "encoder_blocks.2.attn.hook_pattern\n", + "encoder_blocks.2.attn.hook_attn_scores\n", + "encoder_blocks.2.hook_resid_mid\n", + "encoder_blocks.2.ln2.hook_in\n", + "encoder_blocks.2.ln2.hook_scale\n", + "encoder_blocks.2.ln2.hook_normalized\n", + "encoder_blocks.2.ln2.hook_out\n", + "encoder_blocks.2.mlp.hook_in\n", + "encoder_blocks.2.mlp.in.hook_in\n", + "encoder_blocks.2.mlp.hook_pre\n", + "encoder_blocks.2.mlp.in.hook_out\n", + "encoder_blocks.2.mlp.hook_post\n", + "encoder_blocks.2.mlp.out.hook_in\n", + "encoder_blocks.2.mlp.out.hook_out\n", + "encoder_blocks.2.mlp.hook_out\n", + "encoder_blocks.2.hook_out\n", + "encoder_blocks.2.hook_resid_post\n", + "encoder_blocks.3.hook_in\n", + "encoder_blocks.3.hook_resid_pre\n", + "encoder_blocks.3.ln1.hook_in\n", + "encoder_blocks.3.ln1.hook_scale\n", + "encoder_blocks.3.ln1.hook_normalized\n", + "encoder_blocks.3.ln1.hook_out\n", + "encoder_blocks.3.attn.hook_in\n", + "encoder_blocks.3.attn.q.hook_in\n", + "encoder_blocks.3.attn.hook_q\n", + "encoder_blocks.3.attn.q.hook_out\n", + "encoder_blocks.3.attn.k.hook_in\n", + "encoder_blocks.3.attn.hook_k\n", + "encoder_blocks.3.attn.k.hook_out\n", + "encoder_blocks.3.attn.v.hook_in\n", + "encoder_blocks.3.attn.hook_v\n", + "encoder_blocks.3.attn.v.hook_out\n", + "encoder_blocks.3.attn.hook_z\n", + "encoder_blocks.3.attn.o.hook_in\n", + "encoder_blocks.3.attn.o.hook_out\n", + "encoder_blocks.3.attn.hook_out\n", + "encoder_blocks.3.attn.hook_result\n", + "encoder_blocks.3.attn.hook_pattern\n", + "encoder_blocks.3.attn.hook_attn_scores\n", + "encoder_blocks.3.hook_resid_mid\n", + "encoder_blocks.3.ln2.hook_in\n", + "encoder_blocks.3.ln2.hook_scale\n", + "encoder_blocks.3.ln2.hook_normalized\n", + "encoder_blocks.3.ln2.hook_out\n", + "encoder_blocks.3.mlp.hook_in\n", + "encoder_blocks.3.mlp.in.hook_in\n", + "encoder_blocks.3.mlp.hook_pre\n", + "encoder_blocks.3.mlp.in.hook_out\n", + "encoder_blocks.3.mlp.hook_post\n", + "encoder_blocks.3.mlp.out.hook_in\n", + "encoder_blocks.3.mlp.out.hook_out\n", + "encoder_blocks.3.mlp.hook_out\n", + "encoder_blocks.3.hook_out\n", + "encoder_blocks.3.hook_resid_post\n", + "encoder_blocks.4.hook_in\n", + "encoder_blocks.4.hook_resid_pre\n", + "encoder_blocks.4.ln1.hook_in\n", + "encoder_blocks.4.ln1.hook_scale\n", + "encoder_blocks.4.ln1.hook_normalized\n", + "encoder_blocks.4.ln1.hook_out\n", + "encoder_blocks.4.attn.hook_in\n", + "encoder_blocks.4.attn.q.hook_in\n", + "encoder_blocks.4.attn.hook_q\n", + "encoder_blocks.4.attn.q.hook_out\n", + "encoder_blocks.4.attn.k.hook_in\n", + "encoder_blocks.4.attn.hook_k\n", + "encoder_blocks.4.attn.k.hook_out\n", + "encoder_blocks.4.attn.v.hook_in\n", + "encoder_blocks.4.attn.hook_v\n", + "encoder_blocks.4.attn.v.hook_out\n", + "encoder_blocks.4.attn.hook_z\n", + "encoder_blocks.4.attn.o.hook_in\n", + "encoder_blocks.4.attn.o.hook_out\n", + "encoder_blocks.4.attn.hook_out\n", + "encoder_blocks.4.attn.hook_result\n", + "encoder_blocks.4.attn.hook_pattern\n", + "encoder_blocks.4.attn.hook_attn_scores\n", + "encoder_blocks.4.hook_resid_mid\n", + "encoder_blocks.4.ln2.hook_in\n", + "encoder_blocks.4.ln2.hook_scale\n", + "encoder_blocks.4.ln2.hook_normalized\n", + "encoder_blocks.4.ln2.hook_out\n", + "encoder_blocks.4.mlp.hook_in\n", + "encoder_blocks.4.mlp.in.hook_in\n", + "encoder_blocks.4.mlp.hook_pre\n", + "encoder_blocks.4.mlp.in.hook_out\n", + "encoder_blocks.4.mlp.hook_post\n", + "encoder_blocks.4.mlp.out.hook_in\n", + "encoder_blocks.4.mlp.out.hook_out\n", + "encoder_blocks.4.mlp.hook_out\n", + "encoder_blocks.4.hook_out\n", + "encoder_blocks.4.hook_resid_post\n", + "encoder_blocks.5.hook_in\n", + "encoder_blocks.5.hook_resid_pre\n", + "encoder_blocks.5.ln1.hook_in\n", + "encoder_blocks.5.ln1.hook_scale\n", + "encoder_blocks.5.ln1.hook_normalized\n", + "encoder_blocks.5.ln1.hook_out\n", + "encoder_blocks.5.attn.hook_in\n", + "encoder_blocks.5.attn.q.hook_in\n", + "encoder_blocks.5.attn.hook_q\n", + "encoder_blocks.5.attn.q.hook_out\n", + "encoder_blocks.5.attn.k.hook_in\n", + "encoder_blocks.5.attn.hook_k\n", + "encoder_blocks.5.attn.k.hook_out\n", + "encoder_blocks.5.attn.v.hook_in\n", + "encoder_blocks.5.attn.hook_v\n", + "encoder_blocks.5.attn.v.hook_out\n", + "encoder_blocks.5.attn.hook_z\n", + "encoder_blocks.5.attn.o.hook_in\n", + "encoder_blocks.5.attn.o.hook_out\n", + "encoder_blocks.5.attn.hook_out\n", + "encoder_blocks.5.attn.hook_result\n", + "encoder_blocks.5.attn.hook_pattern\n", + "encoder_blocks.5.attn.hook_attn_scores\n", + "encoder_blocks.5.hook_resid_mid\n", + "encoder_blocks.5.ln2.hook_in\n", + "encoder_blocks.5.ln2.hook_scale\n", + "encoder_blocks.5.ln2.hook_normalized\n", + "encoder_blocks.5.ln2.hook_out\n", + "encoder_blocks.5.mlp.hook_in\n", + "encoder_blocks.5.mlp.in.hook_in\n", + "encoder_blocks.5.mlp.hook_pre\n", + "encoder_blocks.5.mlp.in.hook_out\n", + "encoder_blocks.5.mlp.hook_post\n", + "encoder_blocks.5.mlp.out.hook_in\n", + "encoder_blocks.5.mlp.out.hook_out\n", + "encoder_blocks.5.mlp.hook_out\n", + "encoder_blocks.5.hook_out\n", + "encoder_blocks.5.hook_resid_post\n", + "encoder_ln_final.hook_in\n", + "encoder_ln_final.hook_scale\n", + "encoder_ln_final.hook_normalized\n", + "encoder_ln_final.hook_out\n", + "decoder_blocks.0.hook_in\n", + "decoder_blocks.0.hook_resid_pre\n", + "decoder_blocks.0.ln1.hook_in\n", + "decoder_blocks.0.ln1.hook_scale\n", + "decoder_blocks.0.ln1.hook_normalized\n", + "decoder_blocks.0.ln1.hook_out\n", + "decoder_blocks.0.self_attn.hook_in\n", + "decoder_blocks.0.self_attn.q.hook_in\n", + "decoder_blocks.0.self_attn.hook_q\n", + "decoder_blocks.0.self_attn.q.hook_out\n", + "decoder_blocks.0.self_attn.k.hook_in\n", + "decoder_blocks.0.self_attn.hook_k\n", + "decoder_blocks.0.self_attn.k.hook_out\n", + "decoder_blocks.0.self_attn.v.hook_in\n", + "decoder_blocks.0.self_attn.hook_v\n", + "decoder_blocks.0.self_attn.v.hook_out\n", + "decoder_pos_embed.hook_in\n", + "decoder_pos_embed.hook_out\n", + "decoder_blocks.0.self_attn.hook_z\n", + "decoder_blocks.0.self_attn.o.hook_in\n", + "decoder_blocks.0.self_attn.o.hook_out\n", + "decoder_blocks.0.self_attn.hook_out\n", + "decoder_blocks.0.self_attn.hook_result\n", + "decoder_blocks.0.self_attn.hook_pattern\n", + "decoder_blocks.0.self_attn.hook_attn_scores\n", + "decoder_blocks.0.hook_resid_mid\n", + "decoder_blocks.0.ln2.hook_in\n", + "decoder_blocks.0.ln2.hook_scale\n", + "decoder_blocks.0.ln2.hook_normalized\n", + "decoder_blocks.0.ln2.hook_out\n", + "decoder_blocks.0.cross_attn.hook_in\n", + "decoder_blocks.0.cross_attn.q.hook_in\n", + "decoder_blocks.0.cross_attn.hook_q\n", + "decoder_blocks.0.cross_attn.q.hook_out\n", + "decoder_blocks.0.cross_attn.k.hook_in\n", + "decoder_blocks.0.cross_attn.hook_k\n", + "decoder_blocks.0.cross_attn.k.hook_out\n", + "decoder_blocks.0.cross_attn.v.hook_in\n", + "decoder_blocks.0.cross_attn.hook_v\n", + "decoder_blocks.0.cross_attn.v.hook_out\n", + "decoder_blocks.0.cross_attn.hook_z\n", + "decoder_blocks.0.cross_attn.o.hook_in\n", + "decoder_blocks.0.cross_attn.o.hook_out\n", + "decoder_blocks.0.cross_attn.hook_out\n", + "decoder_blocks.0.cross_attn.hook_result\n", + "decoder_blocks.0.cross_attn.hook_pattern\n", + "decoder_blocks.0.cross_attn.hook_attn_scores\n", + "decoder_blocks.0.hook_resid_mid2\n", + "decoder_blocks.0.ln3.hook_in\n", + "decoder_blocks.0.ln3.hook_scale\n", + "decoder_blocks.0.ln3.hook_normalized\n", + "decoder_blocks.0.ln3.hook_out\n", + "decoder_blocks.0.mlp.hook_in\n", + "decoder_blocks.0.mlp.in.hook_in\n", + "decoder_blocks.0.mlp.hook_pre\n", + "decoder_blocks.0.mlp.in.hook_out\n", + "decoder_blocks.0.mlp.hook_post\n", + "decoder_blocks.0.mlp.out.hook_in\n", + "decoder_blocks.0.mlp.out.hook_out\n", + "decoder_blocks.0.mlp.hook_out\n", + "decoder_blocks.0.hook_out\n", + "decoder_blocks.0.hook_resid_post\n", + "decoder_blocks.1.hook_in\n", + "decoder_blocks.1.hook_resid_pre\n", + "decoder_blocks.1.ln1.hook_in\n", + "decoder_blocks.1.ln1.hook_scale\n", + "decoder_blocks.1.ln1.hook_normalized\n", + "decoder_blocks.1.ln1.hook_out\n", + "decoder_blocks.1.self_attn.hook_in\n", + "decoder_blocks.1.self_attn.q.hook_in\n", + "decoder_blocks.1.self_attn.hook_q\n", + "decoder_blocks.1.self_attn.q.hook_out\n", + "decoder_blocks.1.self_attn.k.hook_in\n", + "decoder_blocks.1.self_attn.hook_k\n", + "decoder_blocks.1.self_attn.k.hook_out\n", + "decoder_blocks.1.self_attn.v.hook_in\n", + "decoder_blocks.1.self_attn.hook_v\n", + "decoder_blocks.1.self_attn.v.hook_out\n", + "decoder_blocks.1.self_attn.hook_z\n", + "decoder_blocks.1.self_attn.o.hook_in\n", + "decoder_blocks.1.self_attn.o.hook_out\n", + "decoder_blocks.1.self_attn.hook_out\n", + "decoder_blocks.1.self_attn.hook_result\n", + "decoder_blocks.1.self_attn.hook_pattern\n", + "decoder_blocks.1.self_attn.hook_attn_scores\n", + "decoder_blocks.1.hook_resid_mid\n", + "decoder_blocks.1.ln2.hook_in\n", + "decoder_blocks.1.ln2.hook_scale\n", + "decoder_blocks.1.ln2.hook_normalized\n", + "decoder_blocks.1.ln2.hook_out\n", + "decoder_blocks.1.cross_attn.hook_in\n", + "decoder_blocks.1.cross_attn.q.hook_in\n", + "decoder_blocks.1.cross_attn.hook_q\n", + "decoder_blocks.1.cross_attn.q.hook_out\n", + "decoder_blocks.1.cross_attn.k.hook_in\n", + "decoder_blocks.1.cross_attn.hook_k\n", + "decoder_blocks.1.cross_attn.k.hook_out\n", + "decoder_blocks.1.cross_attn.v.hook_in\n", + "decoder_blocks.1.cross_attn.hook_v\n", + "decoder_blocks.1.cross_attn.v.hook_out\n", + "decoder_blocks.1.cross_attn.hook_z\n", + "decoder_blocks.1.cross_attn.o.hook_in\n", + "decoder_blocks.1.cross_attn.o.hook_out\n", + "decoder_blocks.1.cross_attn.hook_out\n", + "decoder_blocks.1.cross_attn.hook_result\n", + "decoder_blocks.1.cross_attn.hook_pattern\n", + "decoder_blocks.1.cross_attn.hook_attn_scores\n", + "decoder_blocks.1.hook_resid_mid2\n", + "decoder_blocks.1.ln3.hook_in\n", + "decoder_blocks.1.ln3.hook_scale\n", + "decoder_blocks.1.ln3.hook_normalized\n", + "decoder_blocks.1.ln3.hook_out\n", + "decoder_blocks.1.mlp.hook_in\n", + "decoder_blocks.1.mlp.in.hook_in\n", + "decoder_blocks.1.mlp.hook_pre\n", + "decoder_blocks.1.mlp.in.hook_out\n", + "decoder_blocks.1.mlp.hook_post\n", + "decoder_blocks.1.mlp.out.hook_in\n", + "decoder_blocks.1.mlp.out.hook_out\n", + "decoder_blocks.1.mlp.hook_out\n", + "decoder_blocks.1.hook_out\n", + "decoder_blocks.1.hook_resid_post\n", + "decoder_blocks.2.hook_in\n", + "decoder_blocks.2.hook_resid_pre\n", + "decoder_blocks.2.ln1.hook_in\n", + "decoder_blocks.2.ln1.hook_scale\n", + "decoder_blocks.2.ln1.hook_normalized\n", + "decoder_blocks.2.ln1.hook_out\n", + "decoder_blocks.2.self_attn.hook_in\n", + "decoder_blocks.2.self_attn.q.hook_in\n", + "decoder_blocks.2.self_attn.hook_q\n", + "decoder_blocks.2.self_attn.q.hook_out\n", + "decoder_blocks.2.self_attn.k.hook_in\n", + "decoder_blocks.2.self_attn.hook_k\n", + "decoder_blocks.2.self_attn.k.hook_out\n", + "decoder_blocks.2.self_attn.v.hook_in\n", + "decoder_blocks.2.self_attn.hook_v\n", + "decoder_blocks.2.self_attn.v.hook_out\n", + "decoder_blocks.2.self_attn.hook_z\n", + "decoder_blocks.2.self_attn.o.hook_in\n", + "decoder_blocks.2.self_attn.o.hook_out\n", + "decoder_blocks.2.self_attn.hook_out\n", + "decoder_blocks.2.self_attn.hook_result\n", + "decoder_blocks.2.self_attn.hook_pattern\n", + "decoder_blocks.2.self_attn.hook_attn_scores\n", + "decoder_blocks.2.hook_resid_mid\n", + "decoder_blocks.2.ln2.hook_in\n", + "decoder_blocks.2.ln2.hook_scale\n", + "decoder_blocks.2.ln2.hook_normalized\n", + "decoder_blocks.2.ln2.hook_out\n", + "decoder_blocks.2.cross_attn.hook_in\n", + "decoder_blocks.2.cross_attn.q.hook_in\n", + "decoder_blocks.2.cross_attn.hook_q\n", + "decoder_blocks.2.cross_attn.q.hook_out\n", + "decoder_blocks.2.cross_attn.k.hook_in\n", + "decoder_blocks.2.cross_attn.hook_k\n", + "decoder_blocks.2.cross_attn.k.hook_out\n", + "decoder_blocks.2.cross_attn.v.hook_in\n", + "decoder_blocks.2.cross_attn.hook_v\n", + "decoder_blocks.2.cross_attn.v.hook_out\n", + "decoder_blocks.2.cross_attn.hook_z\n", + "decoder_blocks.2.cross_attn.o.hook_in\n", + "decoder_blocks.2.cross_attn.o.hook_out\n", + "decoder_blocks.2.cross_attn.hook_out\n", + "decoder_blocks.2.cross_attn.hook_result\n", + "decoder_blocks.2.cross_attn.hook_pattern\n", + "decoder_blocks.2.cross_attn.hook_attn_scores\n", + "decoder_blocks.2.hook_resid_mid2\n", + "decoder_blocks.2.ln3.hook_in\n", + "decoder_blocks.2.ln3.hook_scale\n", + "decoder_blocks.2.ln3.hook_normalized\n", + "decoder_blocks.2.ln3.hook_out\n", + "decoder_blocks.2.mlp.hook_in\n", + "decoder_blocks.2.mlp.in.hook_in\n", + "decoder_blocks.2.mlp.hook_pre\n", + "decoder_blocks.2.mlp.in.hook_out\n", + "decoder_blocks.2.mlp.hook_post\n", + "decoder_blocks.2.mlp.out.hook_in\n", + "decoder_blocks.2.mlp.out.hook_out\n", + "decoder_blocks.2.mlp.hook_out\n", + "decoder_blocks.2.hook_out\n", + "decoder_blocks.2.hook_resid_post\n", + "decoder_blocks.3.hook_in\n", + "decoder_blocks.3.hook_resid_pre\n", + "decoder_blocks.3.ln1.hook_in\n", + "decoder_blocks.3.ln1.hook_scale\n", + "decoder_blocks.3.ln1.hook_normalized\n", + "decoder_blocks.3.ln1.hook_out\n", + "decoder_blocks.3.self_attn.hook_in\n", + "decoder_blocks.3.self_attn.q.hook_in\n", + "decoder_blocks.3.self_attn.hook_q\n", + "decoder_blocks.3.self_attn.q.hook_out\n", + "decoder_blocks.3.self_attn.k.hook_in\n", + "decoder_blocks.3.self_attn.hook_k\n", + "decoder_blocks.3.self_attn.k.hook_out\n", + "decoder_blocks.3.self_attn.v.hook_in\n", + "decoder_blocks.3.self_attn.hook_v\n", + "decoder_blocks.3.self_attn.v.hook_out\n", + "decoder_blocks.3.self_attn.hook_z\n", + "decoder_blocks.3.self_attn.o.hook_in\n", + "decoder_blocks.3.self_attn.o.hook_out\n", + "decoder_blocks.3.self_attn.hook_out\n", + "decoder_blocks.3.self_attn.hook_result\n", + "decoder_blocks.3.self_attn.hook_pattern\n", + "decoder_blocks.3.self_attn.hook_attn_scores\n", + "decoder_blocks.3.hook_resid_mid\n", + "decoder_blocks.3.ln2.hook_in\n", + "decoder_blocks.3.ln2.hook_scale\n", + "decoder_blocks.3.ln2.hook_normalized\n", + "decoder_blocks.3.ln2.hook_out\n", + "decoder_blocks.3.cross_attn.hook_in\n", + "decoder_blocks.3.cross_attn.q.hook_in\n", + "decoder_blocks.3.cross_attn.hook_q\n", + "decoder_blocks.3.cross_attn.q.hook_out\n", + "decoder_blocks.3.cross_attn.k.hook_in\n", + "decoder_blocks.3.cross_attn.hook_k\n", + "decoder_blocks.3.cross_attn.k.hook_out\n", + "decoder_blocks.3.cross_attn.v.hook_in\n", + "decoder_blocks.3.cross_attn.hook_v\n", + "decoder_blocks.3.cross_attn.v.hook_out\n", + "decoder_blocks.3.cross_attn.hook_z\n", + "decoder_blocks.3.cross_attn.o.hook_in\n", + "decoder_blocks.3.cross_attn.o.hook_out\n", + "decoder_blocks.3.cross_attn.hook_out\n", + "decoder_blocks.3.cross_attn.hook_result\n", + "decoder_blocks.3.cross_attn.hook_pattern\n", + "decoder_blocks.3.cross_attn.hook_attn_scores\n", + "decoder_blocks.3.hook_resid_mid2\n", + "decoder_blocks.3.ln3.hook_in\n", + "decoder_blocks.3.ln3.hook_scale\n", + "decoder_blocks.3.ln3.hook_normalized\n", + "decoder_blocks.3.ln3.hook_out\n", + "decoder_blocks.3.mlp.hook_in\n", + "decoder_blocks.3.mlp.in.hook_in\n", + "decoder_blocks.3.mlp.hook_pre\n", + "decoder_blocks.3.mlp.in.hook_out\n", + "decoder_blocks.3.mlp.hook_post\n", + "decoder_blocks.3.mlp.out.hook_in\n", + "decoder_blocks.3.mlp.out.hook_out\n", + "decoder_blocks.3.mlp.hook_out\n", + "decoder_blocks.3.hook_out\n", + "decoder_blocks.3.hook_resid_post\n", + "decoder_blocks.4.hook_in\n", + "decoder_blocks.4.hook_resid_pre\n", + "decoder_blocks.4.ln1.hook_in\n", + "decoder_blocks.4.ln1.hook_scale\n", + "decoder_blocks.4.ln1.hook_normalized\n", + "decoder_blocks.4.ln1.hook_out\n", + "decoder_blocks.4.self_attn.hook_in\n", + "decoder_blocks.4.self_attn.q.hook_in\n", + "decoder_blocks.4.self_attn.hook_q\n", + "decoder_blocks.4.self_attn.q.hook_out\n", + "decoder_blocks.4.self_attn.k.hook_in\n", + "decoder_blocks.4.self_attn.hook_k\n", + "decoder_blocks.4.self_attn.k.hook_out\n", + "decoder_blocks.4.self_attn.v.hook_in\n", + "decoder_blocks.4.self_attn.hook_v\n", + "decoder_blocks.4.self_attn.v.hook_out\n", + "decoder_blocks.4.self_attn.hook_z\n", + "decoder_blocks.4.self_attn.o.hook_in\n", + "decoder_blocks.4.self_attn.o.hook_out\n", + "decoder_blocks.4.self_attn.hook_out\n", + "decoder_blocks.4.self_attn.hook_result\n", + "decoder_blocks.4.self_attn.hook_pattern\n", + "decoder_blocks.4.self_attn.hook_attn_scores\n", + "decoder_blocks.4.hook_resid_mid\n", + "decoder_blocks.4.ln2.hook_in\n", + "decoder_blocks.4.ln2.hook_scale\n", + "decoder_blocks.4.ln2.hook_normalized\n", + "decoder_blocks.4.ln2.hook_out\n", + "decoder_blocks.4.cross_attn.hook_in\n", + "decoder_blocks.4.cross_attn.q.hook_in\n", + "decoder_blocks.4.cross_attn.hook_q\n", + "decoder_blocks.4.cross_attn.q.hook_out\n", + "decoder_blocks.4.cross_attn.k.hook_in\n", + "decoder_blocks.4.cross_attn.hook_k\n", + "decoder_blocks.4.cross_attn.k.hook_out\n", + "decoder_blocks.4.cross_attn.v.hook_in\n", + "decoder_blocks.4.cross_attn.hook_v\n", + "decoder_blocks.4.cross_attn.v.hook_out\n", + "decoder_blocks.4.cross_attn.hook_z\n", + "decoder_blocks.4.cross_attn.o.hook_in\n", + "decoder_blocks.4.cross_attn.o.hook_out\n", + "decoder_blocks.4.cross_attn.hook_out\n", + "decoder_blocks.4.cross_attn.hook_result\n", + "decoder_blocks.4.cross_attn.hook_pattern\n", + "decoder_blocks.4.cross_attn.hook_attn_scores\n", + "decoder_blocks.4.hook_resid_mid2\n", + "decoder_blocks.4.ln3.hook_in\n", + "decoder_blocks.4.ln3.hook_scale\n", + "decoder_blocks.4.ln3.hook_normalized\n", + "decoder_blocks.4.ln3.hook_out\n", + "decoder_blocks.4.mlp.hook_in\n", + "decoder_blocks.4.mlp.in.hook_in\n", + "decoder_blocks.4.mlp.hook_pre\n", + "decoder_blocks.4.mlp.in.hook_out\n", + "decoder_blocks.4.mlp.hook_post\n", + "decoder_blocks.4.mlp.out.hook_in\n", + "decoder_blocks.4.mlp.out.hook_out\n", + "decoder_blocks.4.mlp.hook_out\n", + "decoder_blocks.4.hook_out\n", + "decoder_blocks.4.hook_resid_post\n", + "decoder_blocks.5.hook_in\n", + "decoder_blocks.5.hook_resid_pre\n", + "decoder_blocks.5.ln1.hook_in\n", + "decoder_blocks.5.ln1.hook_scale\n", + "decoder_blocks.5.ln1.hook_normalized\n", + "decoder_blocks.5.ln1.hook_out\n", + "decoder_blocks.5.self_attn.hook_in\n", + "decoder_blocks.5.self_attn.q.hook_in\n", + "decoder_blocks.5.self_attn.hook_q\n", + "decoder_blocks.5.self_attn.q.hook_out\n", + "decoder_blocks.5.self_attn.k.hook_in\n", + "decoder_blocks.5.self_attn.hook_k\n", + "decoder_blocks.5.self_attn.k.hook_out\n", + "decoder_blocks.5.self_attn.v.hook_in\n", + "decoder_blocks.5.self_attn.hook_v\n", + "decoder_blocks.5.self_attn.v.hook_out\n", + "decoder_blocks.5.self_attn.hook_z\n", + "decoder_blocks.5.self_attn.o.hook_in\n", + "decoder_blocks.5.self_attn.o.hook_out\n", + "decoder_blocks.5.self_attn.hook_out\n", + "decoder_blocks.5.self_attn.hook_result\n", + "decoder_blocks.5.self_attn.hook_pattern\n", + "decoder_blocks.5.self_attn.hook_attn_scores\n", + "decoder_blocks.5.hook_resid_mid\n", + "decoder_blocks.5.ln2.hook_in\n", + "decoder_blocks.5.ln2.hook_scale\n", + "decoder_blocks.5.ln2.hook_normalized\n", + "decoder_blocks.5.ln2.hook_out\n", + "decoder_blocks.5.cross_attn.hook_in\n", + "decoder_blocks.5.cross_attn.q.hook_in\n", + "decoder_blocks.5.cross_attn.hook_q\n", + "decoder_blocks.5.cross_attn.q.hook_out\n", + "decoder_blocks.5.cross_attn.k.hook_in\n", + "decoder_blocks.5.cross_attn.hook_k\n", + "decoder_blocks.5.cross_attn.k.hook_out\n", + "decoder_blocks.5.cross_attn.v.hook_in\n", + "decoder_blocks.5.cross_attn.hook_v\n", + "decoder_blocks.5.cross_attn.v.hook_out\n", + "decoder_blocks.5.cross_attn.hook_z\n", + "decoder_blocks.5.cross_attn.o.hook_in\n", + "decoder_blocks.5.cross_attn.o.hook_out\n", + "decoder_blocks.5.cross_attn.hook_out\n", + "decoder_blocks.5.cross_attn.hook_result\n", + "decoder_blocks.5.cross_attn.hook_pattern\n", + "decoder_blocks.5.cross_attn.hook_attn_scores\n", + "decoder_blocks.5.hook_resid_mid2\n", + "decoder_blocks.5.ln3.hook_in\n", + "decoder_blocks.5.ln3.hook_scale\n", + "decoder_blocks.5.ln3.hook_normalized\n", + "decoder_blocks.5.ln3.hook_out\n", + "decoder_blocks.5.mlp.hook_in\n", + "decoder_blocks.5.mlp.in.hook_in\n", + "decoder_blocks.5.mlp.hook_pre\n", + "decoder_blocks.5.mlp.in.hook_out\n", + "decoder_blocks.5.mlp.hook_post\n", + "decoder_blocks.5.mlp.out.hook_in\n", + "decoder_blocks.5.mlp.out.hook_out\n", + "decoder_blocks.5.mlp.hook_out\n", + "decoder_blocks.5.hook_out\n", + "decoder_blocks.5.hook_resid_post\n", + "decoder_ln_final.hook_in\n", + "decoder_ln_final.hook_scale\n", + "decoder_ln_final.hook_normalized\n", + "decoder_ln_final.hook_out\n", + "unembed.hook_in\n", + "hook_unembed\n", + "unembed.hook_out\n" ] } ], "source": [ - "# the usual way of indexing cache via cache[\"pattetn\",0,\"attn\"] does not work\n", - "# besause it uses cache[\"block.0....] indexing\n", - "# t5 is implementes as separate stack of blocks for encoder and decoder\n", - "# so indexing is cache[\"encoder.0..\"], cache[\"decoder.0..\"] \n", + "# the usual way of indexing cache via cache[\"pattern\",0,\"attn\"] does not work\n", + "# because it uses cache[\"block.0....] indexing\n", + "# t5 is implemented as separate stack of blocks for encoder and decoder\n", + "# so indexing is cache[\"encoder_blocks.0..\"], cache[\"decoder_blocks.0..\"] \n", "# lets see what is in cache and choose the right key for encoder attention pattern on layer 0\n", "print(\"\\n\".join(cache.keys()))" ] }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.916766Z", + "iopub.status.busy": "2026-03-05T18:28:00.916692Z", + "iopub.status.idle": "2026-03-05T18:28:00.934799Z", + "shell.execute_reply": "2026-03-05T18:28:00.934520Z" + } + }, "outputs": [], "source": [ - "encoder_attn_pattern = cache[\"encoder.0.attn.hook_pattern\"]\n", + "encoder_attn_pattern = cache[\"encoder_blocks.0.attn.hook_pattern\"]\n", "input_str_tokens = [w.lstrip(\"▁\") for w in tokenizer.convert_ids_to_tokens(input_ids[0])]" ] }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:00.935894Z", + "iopub.status.busy": "2026-03-05T18:28:00.935828Z", + "iopub.status.idle": "2026-03-05T18:28:01.113204Z", + "shell.execute_reply": "2026-03-05T18:28:01.112869Z" + } + }, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 24, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -675,16 +997,33 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:01.114575Z", + "iopub.status.busy": "2026-03-05T18:28:01.114462Z", + "iopub.status.idle": "2026-03-05T18:28:01.139385Z", + "shell.execute_reply": "2026-03-05T18:28:01.139166Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "['', '▁Bonjour', ',', '▁comment', '▁', 'êtes', '-', 'vous', '?', '']" + "['',\n", + " '▁Bonjour',\n", + " ',',\n", + " '▁comment',\n", + " '▁',\n", + " 'êtes',\n", + " '-',\n", + " 'vous',\n", + " '▁',\n", + " '?',\n", + " '']" ] }, - "execution_count": 26, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -696,33 +1035,40 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:01.140493Z", + "iopub.status.busy": "2026-03-05T18:28:01.140416Z", + "iopub.status.idle": "2026-03-05T18:28:01.325077Z", + "shell.execute_reply": "2026-03-05T18:28:01.324541Z" + } + }, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 27, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "decoder_attn_pattern = cache[\"decoder.0.attn.hook_pattern\"]\n", + "decoder_attn_pattern = cache[\"decoder_blocks.0.self_attn.hook_pattern\"]\n", "cv.attention.attention_patterns(tokens=decoder_str_tokens, attention=decoder_attn_pattern)" ] }, @@ -735,27 +1081,34 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": {}, + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2026-03-05T18:28:01.327806Z", + "iopub.status.busy": "2026-03-05T18:28:01.327575Z", + "iopub.status.idle": "2026-03-05T18:28:01.597425Z", + "shell.execute_reply": "2026-03-05T18:28:01.596950Z" + } + }, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 29, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -766,7 +1119,7 @@ "# you can also pass the activations after the attention layer (hook_attn_out),\n", "# after the cross attention layer (hook_cross_attn_out) or after the mlp layer (hook_mlp_out)\n", "activations = [\n", - " torch.stack([cache[f\"decoder.{layer}.hook_mlp_out\"] for layer in range(model.cfg.n_layers)]).cpu().numpy()\n", + " torch.stack([cache[f\"decoder_blocks.{layer}.hook_resid_post\"] for layer in range(model.cfg.n_layers)]).cpu().numpy()\n", " ]\n", "\n", "# list of samples of shape (n_tokens)\n", @@ -783,7 +1136,7 @@ " first_dimension_name=\"Layer\", \n", " first_dimension_labels=layer_labels,\n", " third_dimension_name=\"Neuron\",\n", - ")\n" + ")" ] } ], @@ -808,4 +1161,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/demos/conftest.py b/demos/conftest.py index 2cd3d5a3e..755a20446 100644 --- a/demos/conftest.py +++ b/demos/conftest.py @@ -5,4 +5,5 @@ def pytest_collectstart(collector): "text/html", "application/javascript", "application/vnd.plotly.v1+json", # Plotly + "application/vnd.jupyter.widget-view+json", # Jupyter widgets (random model_id) ) diff --git a/demos/doc_sanitize.cfg b/demos/doc_sanitize.cfg index c21ea942f..729af1b20 100644 --- a/demos/doc_sanitize.cfg +++ b/demos/doc_sanitize.cfg @@ -8,4 +8,8 @@ replace: TIME-STAMP [regex3] regex: 0[xX][0-9a-fA-F]+ -replace: HEX-CODE \ No newline at end of file +replace: HEX-CODE + +[regex4] +regex: ,\s*device='[^']*' +replace: \ No newline at end of file diff --git a/tests/integration/model_bridge/test_refactor_factored_attn_matrices.py b/tests/integration/model_bridge/test_refactor_factored_attn_matrices.py new file mode 100644 index 000000000..4963ea650 --- /dev/null +++ b/tests/integration/model_bridge/test_refactor_factored_attn_matrices.py @@ -0,0 +1,99 @@ +"""Test refactor_factored_attn_matrices with TransformerBridge. + +Verifies that the refactored attention matrices produce correct results when +used via TransformerBridge, matching HookedTransformer output. +""" + +import pytest +import torch + +from transformer_lens import HookedTransformer +from transformer_lens.model_bridge import TransformerBridge + + +@pytest.fixture(scope="module") +def model_name(): + return "distilgpt2" + + +@pytest.fixture(scope="module") +def device(): + return "cpu" + + +@pytest.fixture(scope="module") +def test_text(): + return "Natural language processing" + + +@pytest.fixture(scope="module") +def reference_ht(model_name, device): + """HookedTransformer with refactor_factored_attn_matrices=True.""" + return HookedTransformer.from_pretrained( + model_name, + device=device, + refactor_factored_attn_matrices=True, + ) + + +def test_refactor_factored_attn_matrices_loss_matches(model_name, device, test_text, reference_ht): + """Bridge with refactor_factored_attn_matrices should match HookedTransformer.""" + ref_loss = reference_ht(test_text, return_type="loss") + + bridge = TransformerBridge.boot_transformers(model_name, device=device) + bridge.enable_compatibility_mode(refactor_factored_attn_matrices=True) + bridge_loss = bridge(test_text, return_type="loss") + + assert not torch.isnan(bridge_loss), "Bridge produced NaN loss" + assert not torch.isinf(bridge_loss), "Bridge produced infinite loss" + + loss_diff = abs(bridge_loss.item() - ref_loss.item()) + assert loss_diff < 1.0, ( + f"Loss difference too large: {loss_diff:.6f} " + f"(bridge={bridge_loss.item():.4f}, reference={ref_loss.item():.4f})" + ) + + +def test_refactor_factored_attn_matrices_logits_match(model_name, device, test_text, reference_ht): + """Bridge logits should closely match HookedTransformer logits after refactoring.""" + tokens = reference_ht.to_tokens(test_text) + ref_logits = reference_ht(tokens) + + bridge = TransformerBridge.boot_transformers(model_name, device=device) + bridge.enable_compatibility_mode(refactor_factored_attn_matrices=True) + bridge_logits = bridge(tokens) + + # Check shapes match + assert ( + ref_logits.shape == bridge_logits.shape + ), f"Shape mismatch: ref={ref_logits.shape}, bridge={bridge_logits.shape}" + + # Check values are close + max_diff = (ref_logits - bridge_logits).abs().max().item() + assert max_diff < 1.0, f"Max logit difference too large: {max_diff:.6f}" + + +def test_refactor_preserves_fold_ln(model_name, device, test_text): + """Refactoring should not undo fold_ln — both should be applied together.""" + # Reference: fold_ln=True + refactor=True + ref = HookedTransformer.from_pretrained( + model_name, + device=device, + fold_ln=True, + refactor_factored_attn_matrices=True, + ) + ref_loss = ref(test_text, return_type="loss") + + # Bridge: same settings + bridge = TransformerBridge.boot_transformers(model_name, device=device) + bridge.enable_compatibility_mode( + fold_ln=True, + refactor_factored_attn_matrices=True, + ) + bridge_loss = bridge(test_text, return_type="loss") + + loss_diff = abs(bridge_loss.item() - ref_loss.item()) + assert loss_diff < 1.0, ( + f"fold_ln + refactor mismatch: {loss_diff:.6f} " + f"(bridge={bridge_loss.item():.4f}, ref={ref_loss.item():.4f})" + ) diff --git a/transformer_lens/ActivationCache.py b/transformer_lens/ActivationCache.py index 55cdbf34a..aa5918e7c 100644 --- a/transformer_lens/ActivationCache.py +++ b/transformer_lens/ActivationCache.py @@ -133,12 +133,16 @@ def remove_batch_dim(self) -> ActivationCache: The ActivationCache with the batch dimension removed. """ if self.has_batch_dim: + # Some tensors lack a batch dim (e.g., T5 position biases) — skip those. + has_batch_1 = any(v.size(0) == 1 for v in self.cache_dict.values()) for key in self.cache_dict: - assert ( - self.cache_dict[key].size(0) == 1 - ), f"Cannot remove batch dimension from cache with batch size > 1, \ - for key {key} with shape {self.cache_dict[key].shape}" - self.cache_dict[key] = self.cache_dict[key][0] + if self.cache_dict[key].size(0) == 1: + self.cache_dict[key] = self.cache_dict[key][0] + else: + assert has_batch_1, ( + f"Cannot remove batch dimension from cache with batch size > 1, " + f"for key {key} with shape {self.cache_dict[key].shape}" + ) self.has_batch_dim = False else: logging.warning("Tried removing batch dimension after already having removed it.") @@ -677,9 +681,20 @@ def compute_head_results( Intended use is to enable use_attn_results when running and caching the model, but this can be useful if you forget. """ - if "blocks.0.attn.hook_result" in self.cache_dict: - logging.warning("Tried to compute head results when they were already cached") - return + # Return early if valid 4D per-head results already exist. + # TransformerBridge may place 3D combined-output tensors under + # hook_result (via alias); detect and replace those with proper 4D. + first_key = "blocks.0.attn.hook_result" + if first_key in self.cache_dict: + val = self.cache_dict[first_key] + if isinstance(val, torch.Tensor) and val.ndim >= 4: + logging.warning("Tried to compute head results when they were already cached") + return + # Stale 3D entries exist — remove them before recomputing + for layer in range(self.model.cfg.n_layers): + key = f"blocks.{layer}.attn.hook_result" + if key in self.cache_dict: + del self.cache_dict[key] for layer in range(self.model.cfg.n_layers): # Note that we haven't enabled set item on this object so we need to edit the underlying # cache_dict directly. @@ -734,11 +749,8 @@ def stack_head_results( # Default to the residual stream immediately pre unembed layer = self.model.cfg.n_layers - if "blocks.0.attn.hook_result" not in self.cache_dict: - print( - "Tried to stack head results when they weren't cached. Computing head results now" - ) - self.compute_head_results() + # Idempotent; also cleans up stale 3D hook_result entries from Bridge. + self.compute_head_results() components: Any = [] labels = [] diff --git a/transformer_lens/benchmarks/activation_cache.py b/transformer_lens/benchmarks/activation_cache.py index 4381cc3e3..925af143f 100644 --- a/transformer_lens/benchmarks/activation_cache.py +++ b/transformer_lens/benchmarks/activation_cache.py @@ -59,7 +59,15 @@ def benchmark_run_with_cache( # Verify cache contains expected keys cache_keys = list(cache.keys()) - expected_patterns = ["embed", "ln_final", "unembed"] + expected_patterns = ["embed", "unembed"] + # Not all architectures have ln_final (e.g., OPT-350m). + has_ln_final = ( + hasattr(bridge, "adapter") + and bridge.adapter.component_mapping + and "ln_final" in bridge.adapter.component_mapping + ) + if has_ln_final: + expected_patterns.append("ln_final") missing_patterns = [] for pattern in expected_patterns: diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py index b383b65d4..9546547d9 100644 --- a/transformer_lens/benchmarks/component_outputs.py +++ b/transformer_lens/benchmarks/component_outputs.py @@ -223,6 +223,28 @@ def __init__( self.adapter = adapter self.cfg = cfg + # Reconcile dtypes: upcast both models to the higher-precision dtype. + self._bridge_was_upcast = False + self._bridge_original_dtype: Optional[torch.dtype] = None + try: + hf_dtype = next(hf_model.parameters()).dtype + except StopIteration: + hf_dtype = torch.float32 + try: + bridge_dtype = next(bridge_model.parameters()).dtype + except StopIteration: + bridge_dtype = torch.float32 + if hf_dtype != bridge_dtype: + # Upcast to the higher-precision dtype + target = hf_dtype if hf_dtype.itemsize >= bridge_dtype.itemsize else bridge_dtype + if bridge_dtype != target: + self._bridge_original_dtype = bridge_dtype + bridge_model.to(target) + self._bridge_was_upcast = True + if hf_dtype != target: + hf_model.to(target) + self.test_dtype = hf_dtype if hf_dtype.itemsize >= bridge_dtype.itemsize else bridge_dtype + # Adjust tolerances based on dtype for reduced precision formats model_dtype = getattr(cfg, "dtype", torch.float32) if model_dtype == torch.bfloat16: @@ -308,7 +330,7 @@ def benchmark_all_components( passed = sum(1 for r in results if r.passed) failed = sum(1 for r in results if not r.passed) - return BenchmarkReport( + report = BenchmarkReport( model_name=getattr(self.cfg, "model_name", "unknown"), total_components=len(results), passed_components=passed, @@ -316,6 +338,12 @@ def benchmark_all_components( component_results=results, ) + # Restore bridge to its original dtype if we upcast it + if self._bridge_was_upcast and self._bridge_original_dtype is not None: + self.bridge_model.to(self._bridge_original_dtype) + + return report + def _test_component_recursive( self, component_path: str, @@ -692,12 +720,25 @@ def _run_component( except AttributeError: # Skip this component raise ValueError("Cannot test pos_embed - unclear interface") + elif component_path == "project_in": + # project_in expects word_embed_proj_dim, not d_model. + word_embed_proj_dim = getattr(self.cfg, "word_embed_proj_dim", None) + if word_embed_proj_dim is not None and word_embed_proj_dim != self.cfg.d_model: + test_input = test_input[..., :word_embed_proj_dim] + return component(test_input) elif ( component_path == "unembed" or "unembed" in component_path or "lm_head" in component_path ): - # Unembedding expects [batch, seq, d_model] input + # Unembed may expect word_embed_proj_dim (e.g., OPT-350m project_out). + word_embed_proj_dim = getattr(self.cfg, "word_embed_proj_dim", None) + if ( + word_embed_proj_dim is not None + and word_embed_proj_dim != self.cfg.d_model + and test_input.shape[-1] != word_embed_proj_dim + ): + test_input = test_input[..., :word_embed_proj_dim] return component(test_input) else: # Standard components (MLP, LayerNorm, etc.) @@ -732,9 +773,12 @@ def _generate_test_inputs(self) -> Dict[str, torch.Tensor]: seq_len = 8 d_model = self.cfg.d_model - # Use dtype from config (matches HF model's dtype) - dtype = getattr(self.cfg, "dtype", torch.float32) - device = next(self.hf_model.parameters()).device + # Use the reconciled dtype from __init__. + dtype = self.test_dtype + try: + device = next(self.hf_model.parameters()).device + except StopIteration: + device = torch.device("cpu") return { "hidden_states": torch.randn(batch_size, seq_len, d_model, dtype=dtype, device=device), diff --git a/transformer_lens/hook_points.py b/transformer_lens/hook_points.py index 743b9c2b0..347dc8b47 100644 --- a/transformer_lens/hook_points.py +++ b/transformer_lens/hook_points.py @@ -243,7 +243,31 @@ def full_hook( pt_handle = self.register_forward_hook(full_hook, prepend=prepend) visible_hooks = self.fwd_hooks elif dir == "bwd": - pt_handle = self.register_full_backward_hook(full_hook, prepend=prepend) + # Use tensor-level grad hooks instead of register_full_backward_hook + # to avoid BackwardHookFunctionBackward views that break downstream + # in-place ops (e.g. OLMo's query_states.clamp_()). + def _bwd_via_tensor_hook( + _module: torch.nn.Module, + _input: Any, + output: Any, + ) -> None: + if isinstance(output, Tensor) and output.requires_grad: + + def _grad_hook(grad: Tensor) -> Any: + result = full_hook(_module, _input, (grad,)) + # full_hook may return a tuple (register_full_backward_hook + # convention) but tensor hooks expect Tensor or None. + if isinstance(result, tuple): + return result[0] + return result + + output.register_hook(_grad_hook) + + if isinstance(hook, partial): + _bwd_via_tensor_hook.__name__ = f"partial({hook.func.__repr__()},...)" + else: + _bwd_via_tensor_hook.__name__ = hook.__repr__() + pt_handle = self.register_forward_hook(_bwd_via_tensor_hook, prepend=prepend) visible_hooks = self.bwd_hooks else: raise ValueError(f"Invalid direction {dir}") diff --git a/transformer_lens/model_bridge/architecture_adapter.py b/transformer_lens/model_bridge/architecture_adapter.py index 1928c8b76..f2ec036d4 100644 --- a/transformer_lens/model_bridge/architecture_adapter.py +++ b/transformer_lens/model_bridge/architecture_adapter.py @@ -2,7 +2,7 @@ This module contains the base class for architecture adapters that map between different model architectures. """ -from typing import Any, Dict, cast +from typing import Any, Dict, Optional, cast import einops import torch @@ -637,10 +637,22 @@ def convert_hf_key_to_tl_key(self, hf_key: str) -> str: if hf_subpath is not None and subkey.startswith(hf_subpath + "."): param = subkey[len(hf_subpath) + 1 :] return f"blocks.{layer_idx}.{tl_subname}.{param}" + # SymbolicBridge (name=None): keys use bridge names directly. + if hf_subpath is None and subkey.startswith(tl_subname + "."): + param = subkey[len(tl_subname) + 1 :] + return f"blocks.{layer_idx}.{tl_subname}.{param}" if hasattr(subcomponent, "submodules"): for tl_nested_name, nested_comp in subcomponent.submodules.items(): - hf_nested_path = f"{hf_subpath}.{nested_comp.name}" - if subkey.startswith(hf_nested_path + "."): + if hf_subpath is not None: + hf_nested_path: Optional[ + str + ] = f"{hf_subpath}.{nested_comp.name}" + else: + # SymbolicBridge: no container prefix + hf_nested_path = nested_comp.name + if hf_nested_path is not None and subkey.startswith( + hf_nested_path + "." + ): param = subkey[len(hf_nested_path) + 1 :] return f"blocks.{layer_idx}.{tl_subname}.{tl_nested_name}.{param}" return hf_key diff --git a/transformer_lens/model_bridge/bridge.py b/transformer_lens/model_bridge/bridge.py index 66203619d..46687b47e 100644 --- a/transformer_lens/model_bridge/bridge.py +++ b/transformer_lens/model_bridge/bridge.py @@ -149,6 +149,7 @@ def boot_transformers( tokenizer: Optional[Any] = None, load_weights: bool = True, trust_remote_code: bool = False, + model_class: Optional[type] = None, ) -> "TransformerBridge": """Boot a model from HuggingFace (alias for sources.transformers.boot). @@ -160,6 +161,8 @@ def boot_transformers( tokenizer: Optional pre-initialized tokenizer to use; if not provided one will be created. load_weights: If False, load model without weights (on meta device) for config inspection only. trust_remote_code: Whether to trust remote code for custom model architectures. + model_class: Optional HuggingFace model class to use instead of the default + auto-detected class (e.g., BertForNextSentencePrediction). Returns: The bridge to the loaded model. @@ -174,6 +177,7 @@ def boot_transformers( tokenizer=tokenizer, load_weights=load_weights, trust_remote_code=trust_remote_code, + model_class=model_class, ) @property @@ -206,8 +210,6 @@ def _register_aliases(self) -> None: for part in single_target.split("."): target_obj = getattr(target_obj, part) object.__setattr__(self, alias_name, target_obj) - if isinstance(target_obj, HookPoint): - target_obj.name = alias_name break except AttributeError: continue @@ -216,8 +218,6 @@ def _register_aliases(self) -> None: for part in target_path.split("."): target_obj = getattr(target_obj, part) object.__setattr__(self, alias_name, target_obj) - if isinstance(target_obj, HookPoint): - target_obj.name = alias_name except AttributeError: pass @@ -384,7 +384,6 @@ def _add_aliases_to_hooks(self, hooks: Dict[str, HookPoint]) -> None: all_aliases = {**self.hook_aliases, **component_aliases} if not all_aliases: return - aliased_hook_ids = set() for alias_name, target in all_aliases.items(): if isinstance(target, list): for single_target in target: @@ -392,11 +391,6 @@ def _add_aliases_to_hooks(self, hooks: Dict[str, HookPoint]) -> None: target_hook = resolve_alias(self, alias_name, {alias_name: single_target}) if target_hook is not None: hooks[alias_name] = target_hook - if isinstance(target_hook, HookPoint): - hook_id = id(target_hook) - if hook_id not in aliased_hook_ids: - target_hook.name = alias_name - aliased_hook_ids.add(hook_id) break except AttributeError: continue @@ -405,17 +399,14 @@ def _add_aliases_to_hooks(self, hooks: Dict[str, HookPoint]) -> None: target_hook = resolve_alias(self, alias_name, {alias_name: target}) if target_hook is not None: hooks[alias_name] = target_hook - if isinstance(target_hook, HookPoint): - hook_id = id(target_hook) - if hook_id not in aliased_hook_ids: - target_hook.name = alias_name - aliased_hook_ids.add(hook_id) except AttributeError: continue def _scan_existing_hooks(self, module: nn.Module, prefix: str = "") -> None: """Scan existing modules for hooks and add them to registry.""" visited = set() + # Prevent alias entries from overwriting canonical HookPoint names. + named_hook_ids: set = set() def scan_module(mod: nn.Module, path: str = "") -> None: obj_id = id(mod) @@ -428,7 +419,10 @@ def scan_module(mod: nn.Module, path: str = "") -> None: hooks_dict = cast(Dict[str, HookPoint], component_hooks) for hook_name, hook in hooks_dict.items(): full_name = f"{path}.{hook_name}" if path else hook_name - hook.name = full_name + hook_id = id(hook) + if hook_id not in named_hook_ids: + hook.name = full_name + named_hook_ids.add(hook_id) self._hook_registry[full_name] = hook for attr_name in dir(mod): if attr_name.startswith("_"): @@ -459,7 +453,10 @@ def scan_module(mod: nn.Module, path: str = "") -> None: continue name = f"{path}.{attr_name}" if path else attr_name if isinstance(attr, HookPoint): - attr.name = name + hook_id = id(attr) + if hook_id not in named_hook_ids: + attr.name = name + named_hook_ids.add(hook_id) self._hook_registry[name] = attr for child_name, child_module in mod.named_children(): if ( @@ -703,6 +700,17 @@ def process_weights( if verbose: print(f"Processing weights for {self.cfg.model_name}...") + # Soft capping (tanh) is not translation-invariant; centering would change output. + if center_unembed and getattr(self.cfg, "output_logits_soft_cap", -1.0) > 0.0: + import logging + + logging.warning( + "center_unembed=True is incompatible with logit softcapping " + "(output_logits_soft_cap=%.1f). Disabling center_unembed.", + self.cfg.output_logits_soft_cap, + ) + center_unembed = False + if verbose: print(" Extracting state dict from existing model...") state_dict = self.state_dict() @@ -1210,7 +1218,7 @@ def forward( Args: input: Input to the model - return_type: Type of output to return ('logits', 'loss', 'both', None) + return_type: Type of output to return ('logits', 'loss', 'both', 'predictions', None) loss_per_token: Whether to return loss per token prepend_bos: Whether to prepend BOS token padding_side: Which side to pad on @@ -1228,6 +1236,15 @@ def forward( for block in self.blocks: block._stop_at_layer_idx = stop_at_layer + # Map HookedEncoderDecoder-style kwargs to HF-compatible names + if "decoder_input" in kwargs: + kwargs["decoder_input_ids"] = kwargs.pop("decoder_input") + if "one_zero_attention_mask" in kwargs: + if attention_mask is None: + attention_mask = kwargs.pop("one_zero_attention_mask") + else: + kwargs.pop("one_zero_attention_mask") + try: if isinstance(input, (str, list)): input_ids = self.to_tokens( @@ -1288,6 +1305,10 @@ def forward( else: kwargs["decoder_input_ids"] = input_ids + # Tell PosEmbedBridge to expand batch=1 position_ids to full batch. + if hasattr(self, "pos_embed"): + self.pos_embed._current_batch_size = input_ids.shape[0] + original_tl_cache = past_kv_cache output = self.original_model(input_ids, **kwargs) if ( @@ -1339,6 +1360,26 @@ def forward( ), f"Expected logits tensor, got {type(logits)}" loss = self.loss_fn(logits, input_ids, per_token=loss_per_token) return (logits, loss) + elif return_type == "predictions": + assert ( + self.tokenizer is not None + ), "Must have a tokenizer to use return_type='predictions'" + if logits.shape[-1] == 2: + # Next Sentence Prediction — 2-class output + logprobs = logits.log_softmax(dim=-1) + predictions = [ + "The sentences are sequential", + "The sentences are NOT sequential", + ] + return predictions[logprobs.argmax(dim=-1).item()] + else: + # Masked Language Modeling — decode [MASK] tokens + logprobs = logits[input_ids == self.tokenizer.mask_token_id].log_softmax(dim=-1) + predictions = self.tokenizer.decode(logprobs.argmax(dim=-1)) + if " " in predictions: + predictions = predictions.split(" ") + predictions = [f"Prediction {i}: {p}" for i, p in enumerate(predictions)] + return predictions elif return_type is None: return None else: @@ -1465,21 +1506,24 @@ def create_names_filter_fn(filter_input): hooks: List[Tuple[HookPoint, str]] = [] visited: set[int] = set() + # None → no-op .to(None), tensors stay on their current device. + cache_device = kwargs.pop("device", None) + def make_cache_hook(name: str): def cache_hook(tensor: torch.Tensor, *, hook: Any) -> torch.Tensor: if tensor is None: cache[name] = None elif isinstance(tensor, torch.Tensor): - cache[name] = tensor.detach().cpu() + cache[name] = tensor.detach().to(cache_device) elif isinstance(tensor, tuple): if len(tensor) > 0 and isinstance(tensor[0], torch.Tensor): - cache[name] = tensor[0].detach().cpu() + cache[name] = tensor[0].detach().to(cache_device) else: pass else: try: if hasattr(tensor, "detach"): - cache[name] = tensor.detach().cpu() + cache[name] = tensor.detach().to(cache_device) except: pass return tensor @@ -1535,14 +1579,13 @@ def stop_hook(tensor: torch.Tensor, *, hook: Any) -> torch.Tensor: hook_dict[block_hook_name].add_hook(stop_hook) hooks.append((hook_dict[block_hook_name], block_hook_name)) filtered_kwargs = kwargs.copy() - target_device = filtered_kwargs.pop("device", None) - if target_device is not None: - self.original_model = self.original_model.to(target_device) + if cache_device is not None: + self.original_model = self.original_model.to(cache_device) if processed_args and isinstance(processed_args[0], torch.Tensor): - processed_args = [processed_args[0].to(target_device)] + list(processed_args[1:]) + processed_args = [processed_args[0].to(cache_device)] + list(processed_args[1:]) for key, value in filtered_kwargs.items(): if isinstance(value, torch.Tensor): - filtered_kwargs[key] = value.to(target_device) + filtered_kwargs[key] = value.to(cache_device) try: if "output_attentions" not in filtered_kwargs: filtered_kwargs["output_attentions"] = True @@ -1826,14 +1869,39 @@ def generate( # Optionally collect logits at each generation step for downstream tooling/tests logits_seq_list: list[torch.Tensor] | None = [] if output_logits else None + # Detect encoder-decoder models (T5, BART, etc.) + is_encoder_decoder = hasattr(self.original_model, "config") and getattr( + self.original_model.config, "is_encoder_decoder", False + ) + # Generate tokens current_tokens = input_tokens.clone() sampled_tokens_list = [] + # For encoder-decoder models, keep encoder input fixed and grow decoder input + if is_encoder_decoder: + encoder_input = input_tokens.clone() + decoder_start_token_id = getattr( + self.original_model.config, "decoder_start_token_id", 0 + ) + decoder_tokens = torch.full( + (batch_size, 1), + decoder_start_token_id, + dtype=input_tokens.dtype, + device=self.cfg.device, + ) + for _ in range(max_new_tokens): # Get logits for next token with torch.no_grad(): - logits = self(current_tokens, return_type="logits") + if is_encoder_decoder: + logits = self( + encoder_input, + return_type="logits", + decoder_input=decoder_tokens, + ) + else: + logits = self(current_tokens, return_type="logits") final_logits = logits[:, -1, :] # Collect logits if requested @@ -1849,14 +1917,14 @@ def generate( temperature=temperature, freq_penalty=freq_penalty, repetition_penalty=repetition_penalty, - tokens=current_tokens, + tokens=decoder_tokens if is_encoder_decoder else current_tokens, ).to(self.cfg.device) else: sampled_tokens = utils.sample_logits( final_logits, temperature=0.0, repetition_penalty=repetition_penalty, - tokens=current_tokens, + tokens=decoder_tokens if is_encoder_decoder else current_tokens, ).to(self.cfg.device) sampled_tokens_list.append(sampled_tokens.unsqueeze(1)) @@ -1872,7 +1940,10 @@ def generate( ) # Append sampled token to current sequence - current_tokens = torch.cat([current_tokens, sampled_tokens.unsqueeze(1)], dim=1) + if is_encoder_decoder: + decoder_tokens = torch.cat([decoder_tokens, sampled_tokens.unsqueeze(1)], dim=1) + else: + current_tokens = torch.cat([current_tokens, sampled_tokens.unsqueeze(1)], dim=1) # Early stopping if all sequences finished if stop_at_eos and finished_sequences.all(): @@ -1880,7 +1951,10 @@ def generate( # Concatenate all sampled tokens sampled_tokens = torch.cat(sampled_tokens_list, dim=1) - output_tokens = torch.cat([input_tokens, sampled_tokens], dim=1) + if is_encoder_decoder: + output_tokens = decoder_tokens + else: + output_tokens = torch.cat([input_tokens, sampled_tokens], dim=1) # Return ModelOutput if output_logits was requested if output_logits and logits_seq_list is not None: @@ -2171,8 +2245,36 @@ def mps(self) -> "TransformerBridge": """ return self.to(torch.device("mps")) - def add_hook(self, name: str, hook_fn, dir="fwd", is_permanent=False): - """Add a hook to a specific component.""" + def add_hook( + self, + name: Union[str, Callable[[str], bool]], + hook_fn, + dir="fwd", + is_permanent=False, + ): + """Add a hook to a specific component or to all components matching a filter. + + Args: + name: Either a string hook point name (e.g. "blocks.0.attn.hook_q") + or a callable filter ``(str) -> bool`` that is applied to every + hook point name; the hook is added to each point where the filter + returns True. + hook_fn: The hook function ``(activation, hook) -> activation | None``. + dir: Hook direction, ``"fwd"`` or ``"bwd"``. + is_permanent: If True the hook survives ``reset_hooks()`` calls. + """ + if callable(name) and not isinstance(name, str): + hook_dict = self.hook_dict + seen_hooks: set[int] = set() + for hook_name, hook_point in hook_dict.items(): + if name(hook_name): + hook_id = id(hook_point) + if hook_id in seen_hooks: + continue + seen_hooks.add(hook_id) + hook_point.add_hook(hook_fn, dir=dir, is_permanent=is_permanent) + return + component = self parts = name.split(".") for part in parts[:-1]: @@ -2389,7 +2491,9 @@ def _normalize_bridge_key_to_hf(self, key: str) -> str: # Map top-level components for tl_name, component in component_mapping.items(): if component.name and tl_name != "blocks": - attr_to_hf[tl_name] = component.name + # Skip if TL name is already a suffix of the HF path (avoids doubling). + if tl_name != component.name and not component.name.endswith("." + tl_name): + attr_to_hf[tl_name] = component.name # Map block-level components (ln1, ln2, attn, mlp) blocks_component = component_mapping.get("blocks") diff --git a/transformer_lens/model_bridge/generalized_components/joint_qkv_attention.py b/transformer_lens/model_bridge/generalized_components/joint_qkv_attention.py index ce2eff8d8..5e26431f7 100644 --- a/transformer_lens/model_bridge/generalized_components/joint_qkv_attention.py +++ b/transformer_lens/model_bridge/generalized_components/joint_qkv_attention.py @@ -105,6 +105,22 @@ def __init__( self._reference_model: Optional[Any] = None self._layer_idx: Optional[int] = None + # Exclude stale qkv combined weights from state_dict after splitting. + self._register_state_dict_hook(JointQKVAttentionBridge._filter_qkv_state_dict) + + @staticmethod + def _filter_qkv_state_dict( + module: torch.nn.Module, + state_dict: Dict[str, Any], + prefix: str, + local_metadata: Dict[str, Any], + ) -> None: + """State dict hook that removes stale combined QKV entries.""" + qkv_prefix = prefix + "qkv." + keys_to_remove = [k for k in state_dict if k.startswith(qkv_prefix)] + for k in keys_to_remove: + del state_dict[k] + def _create_qkv_conversion_rule(self) -> BaseTensorConversion: """Create the appropriate conversion rule for the individual q, k, and v matrices. diff --git a/transformer_lens/model_bridge/generalized_components/moe.py b/transformer_lens/model_bridge/generalized_components/moe.py index cc19e16ff..6a7b55cca 100644 --- a/transformer_lens/model_bridge/generalized_components/moe.py +++ b/transformer_lens/model_bridge/generalized_components/moe.py @@ -99,6 +99,15 @@ def forward(self, *args: Any, **kwargs: Any) -> Any: ): hooked = hooked.to(dtype=target_dtype) args = (hooked,) + args[1:] + elif "hidden_states" in kwargs: + hooked = self.hook_in(kwargs["hidden_states"]) + if ( + target_dtype is not None + and isinstance(hooked, torch.Tensor) + and hooked.is_floating_point() + ): + hooked = hooked.to(dtype=target_dtype) + kwargs = {**kwargs, "hidden_states": hooked} output = self.original_component(*args, **kwargs) if isinstance(output, tuple): hidden_states = output[0] diff --git a/transformer_lens/model_bridge/generalized_components/normalization.py b/transformer_lens/model_bridge/generalized_components/normalization.py index 6fce10e2f..1dca9b0ce 100644 --- a/transformer_lens/model_bridge/generalized_components/normalization.py +++ b/transformer_lens/model_bridge/generalized_components/normalization.py @@ -73,8 +73,8 @@ def forward(self, hidden_states: torch.Tensor, **kwargs: Any) -> torch.Tensor: hidden_states.pow(2).mean(-1, keepdim=True) + getattr(self.config, "eps", 1e-05) ).sqrt() ) - dtype = getattr(self.config, "dtype", input_dtype) - hidden_states = self.hook_normalized(hidden_states / scale).to(dtype) + hidden_states = self.hook_normalized(hidden_states / scale) + # Apply weight/bias in float32 before casting back (matches HF precision). if uses_rms_norm: hidden_states = hidden_states * self.weight else: @@ -84,7 +84,7 @@ def forward(self, hidden_states: torch.Tensor, **kwargs: Any) -> torch.Tensor: and self.original_component.bias is not None ): hidden_states = hidden_states + cast(torch.Tensor, self.original_component.bias) - result = hidden_states + result = hidden_states.to(input_dtype) output = self.hook_out(result) return output diff --git a/transformer_lens/model_bridge/generalized_components/pos_embed.py b/transformer_lens/model_bridge/generalized_components/pos_embed.py index f9f8342de..caf38b0b5 100644 --- a/transformer_lens/model_bridge/generalized_components/pos_embed.py +++ b/transformer_lens/model_bridge/generalized_components/pos_embed.py @@ -69,5 +69,21 @@ def forward(self, *args: Any, **kwargs: Any) -> torch.Tensor: first_arg = self.hook_in(args[0]) args = (first_arg,) + args[1:] output = self.original_component(*args, **kwargs) + + # Expand batch=1 pos embeddings to match actual batch size for hooks. + batch_size = getattr(self, "_current_batch_size", None) + + # Read-and-clear to avoid stale values during generate() steps. + if batch_size is not None: + self._current_batch_size = None + if ( + batch_size is not None + and batch_size > 1 + and isinstance(output, torch.Tensor) + and output.ndim >= 1 + and output.shape[0] == 1 + ): + output = output.expand(batch_size, *[-1] * (output.ndim - 1)) + output = self.hook_out(output) return output diff --git a/transformer_lens/model_bridge/generalized_components/unembedding.py b/transformer_lens/model_bridge/generalized_components/unembedding.py index ca17cab50..56a5b4ad6 100644 --- a/transformer_lens/model_bridge/generalized_components/unembedding.py +++ b/transformer_lens/model_bridge/generalized_components/unembedding.py @@ -102,14 +102,6 @@ def forward(self, hidden_states: torch.Tensor, **kwargs: Any) -> torch.Tensor: hidden_states = hidden_states.to(dtype=target_dtype) output = self.original_component(hidden_states, **kwargs) - # Apply logit soft-capping if configured (e.g., for Gemma-2) - if self.config is not None and hasattr(self.config, "output_logits_soft_cap"): - output_logits_soft_cap = self.config.output_logits_soft_cap - if output_logits_soft_cap is not None and output_logits_soft_cap > 0: - output = output / output_logits_soft_cap - output = torch.tanh(output) - output = output * output_logits_soft_cap - output = self.hook_out(output) return output diff --git a/transformer_lens/model_bridge/sources/transformers.py b/transformer_lens/model_bridge/sources/transformers.py index 3449d0e1e..2c1fc5840 100644 --- a/transformer_lens/model_bridge/sources/transformers.py +++ b/transformer_lens/model_bridge/sources/transformers.py @@ -7,6 +7,7 @@ import logging import os import warnings +from typing import Any import torch from transformers import ( @@ -246,6 +247,7 @@ def boot( tokenizer: PreTrainedTokenizerBase | None = None, load_weights: bool = True, trust_remote_code: bool = False, + model_class: Any | None = None, ) -> TransformerBridge: """Boot a model from HuggingFace. @@ -256,6 +258,9 @@ def boot( dtype: The dtype to use for the model. tokenizer: Optional pre-initialized tokenizer to use; if not provided one will be created. load_weights: If False, load model without weights (on meta device) for config inspection only. + model_class: Optional HuggingFace model class to use instead of the default auto-detected + class. When the class name matches a key in SUPPORTED_ARCHITECTURES, the corresponding + adapter is selected automatically (e.g., BertForNextSentencePrediction). Returns: The bridge to the loaded model. @@ -297,11 +302,23 @@ def boot( word_embed_proj_dim = getattr(hf_config, "word_embed_proj_dim", None) if word_embed_proj_dim is not None: bridge_config.word_embed_proj_dim = word_embed_proj_dim + # OPT post-norm breaks fold_ln assumptions (pre-norm only). + do_layer_norm_before = getattr(hf_config, "do_layer_norm_before", None) + if do_layer_norm_before is not None: + bridge_config.do_layer_norm_before = do_layer_norm_before + # Propagate Gemma2 logit/attn softcapping config from HF to TL fields. + final_logit_softcapping = getattr(hf_config, "final_logit_softcapping", None) + if final_logit_softcapping is not None: + bridge_config.output_logits_soft_cap = float(final_logit_softcapping) + attn_logit_softcapping = getattr(hf_config, "attn_logit_softcapping", None) + if attn_logit_softcapping is not None: + bridge_config.attn_scores_soft_cap = float(attn_logit_softcapping) adapter = ArchitectureAdapterFactory.select_architecture_adapter(bridge_config) if device is None: device = get_device() adapter.cfg.device = str(device) - model_class = get_hf_model_class_for_architecture(architecture) + if model_class is None: + model_class = get_hf_model_class_for_architecture(architecture) # Ensure pad_token_id exists on HF config. Transformers v5 raises AttributeError # for missing config attributes (instead of returning None), which crashes models # like Phi-1 that access config.pad_token_id during __init__. diff --git a/transformer_lens/model_bridge/supported_architectures/bert.py b/transformer_lens/model_bridge/supported_architectures/bert.py index 634e2975b..bcd4a4877 100644 --- a/transformer_lens/model_bridge/supported_architectures/bert.py +++ b/transformer_lens/model_bridge/supported_architectures/bert.py @@ -82,8 +82,7 @@ def __init__(self, cfg: Any) -> None: } # Set up component mapping - # The bridge loads BertForMaskedLM, so core model paths need the 'bert.' prefix. - # The MLM head (cls.predictions) is at the top level of BertForMaskedLM. + # MLM defaults; prepare_model() adjusts for other task heads (e.g., NSP). self.component_mapping = { "embed": EmbeddingBridge(name="bert.embeddings.word_embeddings"), "pos_embed": PosEmbedBridge(name="bert.embeddings.position_embeddings"), @@ -98,8 +97,16 @@ def __init__(self, cfg: Any) -> None: "hook_mlp_in": "mlp.in.hook_in", }, submodules={ - "ln1": NormalizationBridge(name="attention.output.LayerNorm", config=self.cfg), - "ln2": NormalizationBridge(name="output.LayerNorm", config=self.cfg), + "ln1": NormalizationBridge( + name="attention.output.LayerNorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), + "ln2": NormalizationBridge( + name="output.LayerNorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "attn": AttentionBridge( name="attention", config=self.cfg, @@ -122,6 +129,21 @@ def __init__(self, cfg: Any) -> None: ), "unembed": UnembeddingBridge(name="cls.predictions.decoder"), "ln_final": NormalizationBridge( - name="cls.predictions.transform.LayerNorm", config=self.cfg + name="cls.predictions.transform.LayerNorm", + config=self.cfg, + use_native_layernorm_autograd=True, ), } + + def prepare_model(self, hf_model: Any) -> None: + """Adjust component mapping based on the actual HF model variant. + + BertForMaskedLM has cls.predictions (MLM head). + BertForNextSentencePrediction has cls.seq_relationship (NSP head) + and no MLM-specific LayerNorm. + """ + if hasattr(hf_model, "cls") and hasattr(hf_model.cls, "seq_relationship"): + # NSP model — swap head components + assert self.component_mapping is not None + self.component_mapping["unembed"] = UnembeddingBridge(name="cls.seq_relationship") + self.component_mapping.pop("ln_final", None) diff --git a/transformer_lens/model_bridge/supported_architectures/gemma2.py b/transformer_lens/model_bridge/supported_architectures/gemma2.py index 2c4876060..163a1ddac 100644 --- a/transformer_lens/model_bridge/supported_architectures/gemma2.py +++ b/transformer_lens/model_bridge/supported_architectures/gemma2.py @@ -71,13 +71,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py b/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py index ca9210896..6c9e71941 100644 --- a/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py +++ b/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py @@ -80,13 +80,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/gpt2.py b/transformer_lens/model_bridge/supported_architectures/gpt2.py index 88b629ac7..bc68d064e 100644 --- a/transformer_lens/model_bridge/supported_architectures/gpt2.py +++ b/transformer_lens/model_bridge/supported_architectures/gpt2.py @@ -27,14 +27,21 @@ class QKVSplitRearrangeConversion(BaseTensorConversion): - """Custom conversion that splits QKV tensor and then rearranges.""" + """Custom conversion that splits QKV tensor and then rearranges. + + Handles two input formats: + - Combined QKV tensor (from HuggingFace): one dimension is ~3x the other. + Splits into Q/K/V parts, then rearranges to TL format. + - Already-split tensor (from bridge state dict): nn.Linear format + [n_heads*d_head, d_model]. Rearranges directly to TL format. + """ def __init__(self, qkv_index: int, rearrange_pattern: str, **axes_lengths): """Initialize the conversion. Args: qkv_index: Index of Q (0), K (1), or V (2) in the QKV tensor - rearrange_pattern: Einops pattern for rearrangement + rearrange_pattern: Einops pattern for rearrangement (Conv1D format) **axes_lengths: Additional axes lengths for einops """ super().__init__() @@ -42,25 +49,50 @@ def __init__(self, qkv_index: int, rearrange_pattern: str, **axes_lengths): self.rearrange_pattern = rearrange_pattern self.axes_lengths = axes_lengths + def _is_combined_qkv(self, tensor: torch.Tensor) -> bool: + """Check if a tensor is a combined QKV tensor vs already-split.""" + if tensor.ndim == 2: + d0, d1 = tensor.shape + return d1 > d0 * 2 or d0 > d1 * 2 + if tensor.ndim == 1: + n = self.axes_lengths.get("n", 1) + # Combined bias has 3x the expected individual size + return tensor.shape[0] % 3 == 0 and tensor.shape[0] > n * 3 + return False + def handle_conversion(self, input_value: torch.Tensor, *full_context) -> torch.Tensor: """Split QKV tensor and rearrange the selected part.""" - # Determine the split dimension based on tensor shape + if not self._is_combined_qkv(input_value): + # Already-split nn.Linear format — transpose rearrange pattern: + return einops.rearrange( + input_value, "(n h) d_model -> n d_model h", **self.axes_lengths + ) + + # Combined QKV tensor — split then rearrange if len(input_value.shape) == 2: # Weight tensor: [d_model, 3*d_model] -> split along dim=1 - split_dim = 1 + split_dim = 1 if input_value.shape[1] > input_value.shape[0] else 0 elif len(input_value.shape) == 1: # Bias tensor: [3*n_heads*d_head] -> split along dim=0 split_dim = 0 else: raise ValueError(f"Unexpected tensor shape: {input_value.shape}") - # Split the QKV tensor qkv_parts = torch.tensor_split(input_value, 3, dim=split_dim) selected_part = qkv_parts[self.qkv_index] - - # Apply rearrangement return einops.rearrange(selected_part, self.rearrange_pattern, **self.axes_lengths) + def revert(self, input_value: torch.Tensor, *full_context) -> torch.Tensor: + """Revert from TL format [n_heads, d_model, d_head] to nn.Linear format.""" + if input_value.ndim == 3: + return einops.rearrange( + input_value, "n d_model h -> (n h) d_model", **self.axes_lengths + ) + if input_value.ndim == 2: + # Bias in TL format [n_heads, d_head] -> [n_heads*d_head] + return einops.rearrange(input_value, "n h -> (n h)", **self.axes_lengths) + return input_value + class GPT2ArchitectureAdapter(ArchitectureAdapter): """Architecture adapter for GPT2 models. diff --git a/transformer_lens/model_bridge/supported_architectures/gpt_oss.py b/transformer_lens/model_bridge/supported_architectures/gpt_oss.py index 15af4eab7..2e32d277c 100644 --- a/transformer_lens/model_bridge/supported_architectures/gpt_oss.py +++ b/transformer_lens/model_bridge/supported_architectures/gpt_oss.py @@ -28,6 +28,7 @@ def __init__(self, cfg: Any) -> None: self.cfg.gated_mlp = True + self.cfg.normalization_type = "RMS" self.cfg.uses_rms_norm = True # GPT-OSS uses 'variance_epsilon' instead of 'eps' for RMSNorm self.cfg.eps_attr = "variance_epsilon" diff --git a/transformer_lens/model_bridge/supported_architectures/llama.py b/transformer_lens/model_bridge/supported_architectures/llama.py index 7c9ae0a72..acba3bcab 100644 --- a/transformer_lens/model_bridge/supported_architectures/llama.py +++ b/transformer_lens/model_bridge/supported_architectures/llama.py @@ -77,13 +77,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/neox.py b/transformer_lens/model_bridge/supported_architectures/neox.py index 932a742bb..d4e2b078f 100644 --- a/transformer_lens/model_bridge/supported_architectures/neox.py +++ b/transformer_lens/model_bridge/supported_architectures/neox.py @@ -140,8 +140,16 @@ def __init__(self, cfg: Any) -> None: "blocks": BlockBridge( name="gpt_neox.layers", submodules={ - "ln1": NormalizationBridge(name="input_layernorm", config=self.cfg), - "ln2": NormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "ln1": NormalizationBridge( + name="input_layernorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), + "ln2": NormalizationBridge( + name="post_attention_layernorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "attn": JointQKVPositionEmbeddingsAttentionBridge( name="attention", config=self.cfg, @@ -161,7 +169,11 @@ def __init__(self, cfg: Any) -> None: ), }, ), - "ln_final": NormalizationBridge(name="gpt_neox.final_layer_norm", config=self.cfg), + "ln_final": NormalizationBridge( + name="gpt_neox.final_layer_norm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "unembed": UnembeddingBridge(name="embed_out"), } diff --git a/transformer_lens/model_bridge/supported_architectures/opt.py b/transformer_lens/model_bridge/supported_architectures/opt.py index 5f955b64e..d6ac002ac 100644 --- a/transformer_lens/model_bridge/supported_architectures/opt.py +++ b/transformer_lens/model_bridge/supported_architectures/opt.py @@ -35,6 +35,12 @@ def __init__(self, cfg: Any) -> None: # OPT models were trained with BOS tokens (inherits default_prepend_bos = True) + # Post-norm: disable fold_ln and center_writing_weights (pre-norm only). + is_post_norm = not getattr(self.cfg, "do_layer_norm_before", True) + if is_post_norm: + self.supports_fold_ln = False + self.supports_center_writing_weights = False + self.weight_processing_conversions = { "blocks.{i}.attn.q.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion("(n h) m -> n m h", n=self.cfg.n_heads), @@ -63,7 +69,11 @@ def __init__(self, cfg: Any) -> None: "blocks": BlockBridge( name="model.decoder.layers", submodules={ - "ln1": NormalizationBridge(name="self_attn_layer_norm", config=self.cfg), + "ln1": NormalizationBridge( + name="self_attn_layer_norm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "attn": AttentionBridge( name="self_attn", config=self.cfg, @@ -76,7 +86,11 @@ def __init__(self, cfg: Any) -> None: "o": LinearBridge(name="out_proj"), }, ), - "ln2": NormalizationBridge(name="final_layer_norm", config=self.cfg), + "ln2": NormalizationBridge( + name="final_layer_norm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), # OPT has fc1/fc2 directly on the block, not in an MLP container. # Use SymbolicBridge to maintain TransformerLens structure while # correctly mapping to the underlying architecture. @@ -92,5 +106,11 @@ def __init__(self, cfg: Any) -> None: } if has_final_layer_norm: self.component_mapping["ln_final"] = NormalizationBridge( - name="model.decoder.final_layer_norm", config=self.cfg + name="model.decoder.final_layer_norm", + config=self.cfg, + use_native_layernorm_autograd=True, ) + # project_in/project_out bridge word_embed_proj_dim <-> hidden_size. + if not has_final_layer_norm: + self.component_mapping["project_in"] = LinearBridge(name="model.decoder.project_in") + self.component_mapping["project_out"] = LinearBridge(name="model.decoder.project_out") diff --git a/transformer_lens/model_bridge/supported_architectures/phi.py b/transformer_lens/model_bridge/supported_architectures/phi.py index 63824f032..205a20708 100644 --- a/transformer_lens/model_bridge/supported_architectures/phi.py +++ b/transformer_lens/model_bridge/supported_architectures/phi.py @@ -73,7 +73,11 @@ def __init__(self, cfg: Any) -> None: "blocks": BlockBridge( name="model.layers", submodules={ - "ln1": NormalizationBridge(name="input_layernorm", config=self.cfg), + "ln1": NormalizationBridge( + name="input_layernorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "attn": PositionEmbeddingsAttentionBridge( name="self_attn", config=self.cfg, @@ -95,7 +99,11 @@ def __init__(self, cfg: Any) -> None: ), }, ), - "ln_final": NormalizationBridge(name="model.final_layernorm", config=self.cfg), + "ln_final": NormalizationBridge( + name="model.final_layernorm", + config=self.cfg, + use_native_layernorm_autograd=True, + ), "unembed": UnembeddingBridge(name="lm_head"), } diff --git a/transformer_lens/model_bridge/supported_architectures/qwen2.py b/transformer_lens/model_bridge/supported_architectures/qwen2.py index 8a905e7c0..4a1b37153 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen2.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen2.py @@ -62,13 +62,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/qwen3.py b/transformer_lens/model_bridge/supported_architectures/qwen3.py index 31a871b3c..55da0bc03 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen3.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen3.py @@ -63,13 +63,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/t5.py b/transformer_lens/model_bridge/supported_architectures/t5.py index ffb1af27d..6e2e51822 100644 --- a/transformer_lens/model_bridge/supported_architectures/t5.py +++ b/transformer_lens/model_bridge/supported_architectures/t5.py @@ -37,8 +37,11 @@ def __init__(self, cfg: Any) -> None: """ super().__init__(cfg) + # T5 RMSNorm: disable fold_ln to avoid corrupting weights. + self.supports_fold_ln = False + # Set config variables for weight processing - self.cfg.normalization_type = "LN" + self.cfg.normalization_type = "RMS" self.cfg.positional_embedding_type = "relative_positional_bias" self.cfg.final_rms = False self.cfg.attn_only = False diff --git a/transformer_lens/patching.py b/transformer_lens/patching.py index 90240a3ea..7115726f8 100644 --- a/transformer_lens/patching.py +++ b/transformer_lens/patching.py @@ -202,6 +202,8 @@ def generic_activation_patch( # A generic patching hook - for each index, it applies the patch_setter appropriately to patch the activation def patching_hook(corrupted_activation, hook, index, clean_activation): + # Clone before inplace patch_setter to avoid autograd view conflicts. + corrupted_activation = corrupted_activation.clone() return patch_setter(corrupted_activation, index, clean_activation) # Iterate over every list of indices, and make the appropriate patch! diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index 8c39c5706..ca7d6ca3e 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -7,8 +7,8 @@ "scan_duration_seconds": 2.2 }, "total_architectures": 25, - "total_models": 4908, - "total_verified": 482, + "total_models": 4907, + "total_verified": 485, "models": [ { "architecture_id": "Qwen2ForCausalLM", @@ -26,37 +26,37 @@ "architecture_id": "Qwen3ForCausalLM", "model_id": "Qwen/Qwen3-0.6B", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 82.8 + "phase4_score": 91.9 }, { "architecture_id": "GPT2LMHeadModel", "model_id": "openai-community/gpt2", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 94.9 + "phase4_score": 88.5 }, { "architecture_id": "Qwen2ForCausalLM", "model_id": "Qwen/Qwen2.5-1.5B-Instruct", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 91.8 + "phase4_score": 94.7 }, { "architecture_id": "LlamaForCausalLM", @@ -86,13 +86,13 @@ "architecture_id": "Qwen2ForCausalLM", "model_id": "Qwen/Qwen2.5-0.5B-Instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 100.0 + "phase4_score": 96.6 }, { "architecture_id": "Qwen2ForCausalLM", @@ -110,13 +110,13 @@ "architecture_id": "Qwen3ForCausalLM", "model_id": "Qwen/Qwen3-4B", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 98.8 + "phase4_score": 99.4 }, { "architecture_id": "Qwen3ForCausalLM", @@ -158,13 +158,13 @@ "architecture_id": "OPTForCausalLM", "model_id": "facebook/opt-125m", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 97.0 + "phase4_score": 90.9 }, { "architecture_id": "Qwen3ForCausalLM", @@ -218,7 +218,7 @@ "architecture_id": "LlamaForCausalLM", "model_id": "meta-llama/Llama-3.2-1B-Instruct", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, @@ -266,13 +266,13 @@ "architecture_id": "LlamaForCausalLM", "model_id": "llm-jp/llm-jp-3-3.7b-instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed with issues: P3=88.9% (failed: process_bridge_weights, layer_norm_folding)", + "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, - "phase3_score": 88.9, - "phase4_score": 97.5 + "phase3_score": 100.0, + "phase4_score": 93.7 }, { "architecture_id": "LlamaForCausalLM", @@ -290,13 +290,13 @@ "architecture_id": "GPT2LMHeadModel", "model_id": "distilbert/distilgpt2", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 94.8 + "phase4_score": 81.0 }, { "architecture_id": "Qwen3ForCausalLM", @@ -398,13 +398,13 @@ "architecture_id": "PhiForCausalLM", "model_id": "microsoft/phi-2", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 95.7 + "phase4_score": 95.8 }, { "architecture_id": "Qwen2ForCausalLM", @@ -434,13 +434,13 @@ "architecture_id": "Gemma3ForCausalLM", "model_id": "google/gemma-3-1b-it", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 97.2 + "phase4_score": 99.3 }, { "architecture_id": "Qwen3ForCausalLM", @@ -470,13 +470,13 @@ "architecture_id": "GPTNeoXForCausalLM", "model_id": "EleutherAI/pythia-160m", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 94.0 + "phase4_score": 92.6 }, { "architecture_id": "LlamaForCausalLM", @@ -530,26 +530,25 @@ "architecture_id": "OpenELMForCausalLM", "model_id": "apple/OpenELM-1_1B-Instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "status_label": "UNVERIFIED", - "phase4_score": 96.3 + "phase4_score": 95.3 }, { "architecture_id": "BloomForCausalLM", "model_id": "bigscience/bloomz-560m", "status": 1, - "verified_date": "2026-02-21", + "verified_date": "2026-03-10", "metadata": null, - "note": null, + "note": "Full verification completed with issues: P2=91.7% (failed: generation)", "phase1_score": 100.0, - "phase2_score": 100.0, + "phase2_score": 91.7, "phase3_score": 100.0, - "phase4_score": 90.0 + "phase4_score": 95.9 }, { "architecture_id": "LlamaForCausalLM", @@ -639,7 +638,7 @@ "architecture_id": "Phi3ForCausalLM", "model_id": "kaitchup/Phi-3-mini-4k-instruct-gptq-4bit", "status": 2, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "TransformerLens does not support quantized models at this time", "phase1_score": null, @@ -735,13 +734,13 @@ "architecture_id": "Phi3ForCausalLM", "model_id": "microsoft/Phi-3-mini-4k-instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 59.1 + "phase4_score": 56.4 }, { "architecture_id": "Qwen2ForCausalLM", @@ -1083,25 +1082,25 @@ "architecture_id": "OPTForCausalLM", "model_id": "facebook/opt-1.3b", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 94.7 + "phase4_score": 96.4 }, { "architecture_id": "MixtralForCausalLM", "model_id": "ggml-org/stories15M_MOE", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 92.1 + "phase4_score": 91.1 }, { "architecture_id": "Qwen2ForCausalLM", @@ -1215,13 +1214,13 @@ "architecture_id": "Gemma2ForCausalLM", "model_id": "google/gemma-2-2b-it", "status": 1, - "verified_date": "2026-02-25", + "verified_date": "2026-03-10", "metadata": null, "note": "Core verification completed", "phase1_score": 100.0, - "phase2_score": 64.3, - "phase3_score": 76.2, - "phase4_score": 99.3 + "phase2_score": 100.0, + "phase3_score": 95.2, + "phase4_score": 100.0 }, { "architecture_id": "Qwen3ForCausalLM", @@ -1239,13 +1238,13 @@ "architecture_id": "GPTNeoXForCausalLM", "model_id": "EleutherAI/pythia-70m-deduped", "status": 1, - "verified_date": "2026-02-25", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed: P4=94.0%", + "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 94.0 + "phase4_score": 77.5 }, { "architecture_id": "GptOssForCausalLM", @@ -1551,13 +1550,13 @@ "architecture_id": "Gemma2ForCausalLM", "model_id": "hmellor/tiny-random-Gemma2ForCausalLM", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 70.3 + "phase4_score": 66.3 }, { "architecture_id": "LlamaForCausalLM", @@ -1611,13 +1610,13 @@ "architecture_id": "Olmo2ForCausalLM", "model_id": "allenai/OLMo-2-0425-1B", "status": 3, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, - "note": "Below threshold: P3=90.0% but required tests failed: logits_equivalence \u2014 Tensors differ: max_diff=28.810717, mean_rel=73.159515", + "note": "Below threshold: P3=90.0% but required tests failed: logits_equivalence \u2014 Tensors differ: max_diff=28.810719, mean_rel=60.400272", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 90.0, - "phase4_score": 95.7 + "phase4_score": 94.8 }, { "architecture_id": "MistralForCausalLM", @@ -1755,13 +1754,13 @@ "architecture_id": "GptOssForCausalLM", "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 74.8 + "phase4_score": 77.5 }, { "architecture_id": "Qwen3ForCausalLM", @@ -1887,13 +1886,13 @@ "architecture_id": "GPTNeoForCausalLM", "model_id": "EleutherAI/gpt-neo-125m", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 92.6 + "phase4_score": 94.7 }, { "architecture_id": "Qwen3ForCausalLM", @@ -1970,14 +1969,14 @@ { "architecture_id": "OPTForCausalLM", "model_id": "facebook/opt-350m", - "status": 3, - "verified_date": "2026-02-23", + "status": 1, + "verified_date": "2026-03-10", "metadata": null, - "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 1/147 components failed (1 critical)", - "phase1_score": 50.0, - "phase2_score": 91.7, - "phase3_score": 94.1, - "phase4_score": null + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 87.9 }, { "architecture_id": "Qwen2ForCausalLM", @@ -2043,13 +2042,13 @@ "architecture_id": "GemmaForCausalLM", "model_id": "google/gemma-2b", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed with issues: P2=92.9% (failed: hook_functionality); P3=95.2% (failed: hook_functionality)", "phase1_score": 100.0, "phase2_score": 92.9, "phase3_score": 95.2, - "phase4_score": 98.2 + "phase4_score": 91.7 }, { "architecture_id": "Gemma2ForCausalLM", @@ -2079,13 +2078,13 @@ "architecture_id": "Gemma3ForCausalLM", "model_id": "google/gemma-3-270m", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 84.4 + "phase4_score": 92.7 }, { "architecture_id": "Qwen2ForCausalLM", @@ -2247,13 +2246,13 @@ "architecture_id": "Olmo3ForCausalLM", "model_id": "allenai/Olmo-3-7B-Instruct-SFT", "status": 1, - "verified_date": "2026-02-26", + "verified_date": "2026-03-10", "metadata": null, - "note": "Core verification completed", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 91.1 + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 94.8 }, { "architecture_id": "Qwen3ForCausalLM", @@ -2271,13 +2270,13 @@ "architecture_id": "MistralForCausalLM", "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.2", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed", + "note": "Full verification completed with issues, low text quality", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 39.8 + "phase4_score": 47.5 }, { "architecture_id": "MistralForCausalLM", @@ -2319,13 +2318,13 @@ "architecture_id": "BloomForCausalLM", "model_id": "bigscience/bloom-560m", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed with issues: P2=92.9% (failed: hook_functionality)", "phase1_score": 100.0, "phase2_score": 92.9, "phase3_score": 100.0, - "phase4_score": 91.2 + "phase4_score": 89.2 }, { "architecture_id": "MistralForCausalLM", @@ -2379,13 +2378,13 @@ "architecture_id": "GPTJForCausalLM", "model_id": "EleutherAI/gpt-j-6b", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 95.3 + "phase4_score": 84.3 }, { "architecture_id": "GemmaForCausalLM", @@ -2451,13 +2450,13 @@ "architecture_id": "PhiForCausalLM", "model_id": "microsoft/phi-1_5", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 97.6 + "phase4_score": 97.9 }, { "architecture_id": "Qwen2ForCausalLM", @@ -2475,13 +2474,13 @@ "architecture_id": "T5ForConditionalGeneration", "model_id": "lmqg/flan-t5-base-squad-qag", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed", + "note": "Full verification completed with issues, low text quality", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": null, - "phase4_score": null + "phase4_score": 48.7 }, { "architecture_id": "Phi3ForCausalLM", @@ -2738,14 +2737,14 @@ { "architecture_id": "Olmo3ForCausalLM", "model_id": "allenai/Olmo-3-7B-Instruct", - "status": 1, - "verified_date": "2026-02-26", + "status": 3, + "verified_date": "2026-03-10", "metadata": null, "note": "Core verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 96.8 + "phase2_score": 64.3, + "phase3_score": 75.0, + "phase4_score": 96.1 }, { "architecture_id": "Qwen3ForCausalLM", @@ -2883,13 +2882,13 @@ "architecture_id": "T5ForConditionalGeneration", "model_id": "Vamsi/T5_Paraphrase_Paws", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": null, - "phase4_score": null + "phase4_score": 61.1 }, { "architecture_id": "LlamaForCausalLM", @@ -2979,13 +2978,13 @@ "architecture_id": "GemmaForCausalLM", "model_id": "google/gemma-2b-it", "status": 1, - "verified_date": "2026-02-25", + "verified_date": "2026-03-10", "metadata": null, - "note": "Core verification completed", + "note": "Config unavailable: Gated repo (google/gemma-2b-it)", "phase1_score": 100.0, - "phase2_score": 71.4, + "phase2_score": 92.9, "phase3_score": 90.5, - "phase4_score": 91.1 + "phase4_score": 92.6 }, { "architecture_id": "Qwen2ForCausalLM", @@ -3038,14 +3037,14 @@ { "architecture_id": "MistralForCausalLM", "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.1", - "status": 0, - "verified_date": null, + "status": 1, + "verified_date": "2026-03-10", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null + "note": "Full verification completed with issues, low text quality", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 47.5 }, { "architecture_id": "LlamaForCausalLM", @@ -3135,13 +3134,13 @@ "architecture_id": "MixtralForCausalLM", "model_id": "TitanML/tiny-mixtral", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed", + "note": "Full verification completed with issues, low text quality", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": null + "phase4_score": 44.8 }, { "architecture_id": "Qwen2ForCausalLM", @@ -3807,10 +3806,10 @@ "architecture_id": "BertForMaskedLM", "model_id": "Macropodus/macbert4mdcspell_v1", "status": 1, - "verified_date": "2026-02-21", + "verified_date": "2026-03-10", "metadata": null, - "note": null, - "phase1_score": 50.0, + "note": "Full verification completed", + "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, "phase4_score": null @@ -3855,13 +3854,13 @@ "architecture_id": "OlmoeForCausalLM", "model_id": "allenai/OLMoE-1B-7B-0125", "status": 1, - "verified_date": "2026-02-26", + "verified_date": "2026-03-10", "metadata": null, - "note": "Core verification completed", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 98.6 + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 95.1 }, { "architecture_id": "GPT2LMHeadModel", @@ -4359,13 +4358,13 @@ "architecture_id": "GPTNeoForCausalLM", "model_id": "EleutherAI/gpt-neo-1.3B", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 97.5 + "phase4_score": 97.6 }, { "architecture_id": "Qwen2ForCausalLM", @@ -5099,18 +5098,6 @@ "phase3_score": null, "phase4_score": 89.5 }, - { - "architecture_id": "GptOssForCausalLM", - "model_id": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", - "status": 0, - "verified_date": null, - "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null - }, { "architecture_id": "LlamaForCausalLM", "model_id": "tartuNLP/Llammas-base-p1-GPT-4o-human-error-mix-paragraph-GEC", @@ -5187,13 +5174,13 @@ "architecture_id": "OlmoeForCausalLM", "model_id": "allenai/OLMoE-1B-7B-0924", "status": 1, - "verified_date": "2026-02-26", + "verified_date": "2026-03-10", "metadata": null, - "note": "Core verification completed", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 91.3 + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 97.1 }, { "architecture_id": "GPT2LMHeadModel", @@ -5415,13 +5402,13 @@ "architecture_id": "OlmoForCausalLM", "model_id": "allenai/OLMo-1B-hf", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 95.5 + "phase4_score": 85.1 }, { "architecture_id": "LlamaForCausalLM", @@ -5439,13 +5426,13 @@ "architecture_id": "Olmo2ForCausalLM", "model_id": "allenai/OLMo-2-0425-1B-Instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 99.4 + "phase4_score": 94.2 }, { "architecture_id": "Gemma3ForCausalLM", @@ -5919,13 +5906,13 @@ "architecture_id": "StableLmForCausalLM", "model_id": "stabilityai/stablelm-3b-4e1t", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 93.7 + "phase4_score": 95.9 }, { "architecture_id": "MistralForCausalLM", @@ -6075,13 +6062,13 @@ "architecture_id": "GPTJForCausalLM", "model_id": "peft-internal-testing/tiny-random-GPTJForCausalLM", "status": 1, - "verified_date": "2026-02-24", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed", + "note": "Full verification completed with issues, low text quality", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": null + "phase4_score": 37.6 }, { "architecture_id": "LlamaForCausalLM", @@ -7334,14 +7321,14 @@ { "architecture_id": "GptOssForCausalLM", "model_id": "optimum-intel-internal-testing/tiny-GptOssForCausalLM", - "status": 0, - "verified_date": null, + "status": 1, + "verified_date": "2026-03-10", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null + "note": "Full verification completed with issues: P3=94.7% (failed: attention_output_centering)", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 94.7, + "phase4_score": 77.5 }, { "architecture_id": "MistralForCausalLM", @@ -8223,13 +8210,13 @@ "architecture_id": "StableLmForCausalLM", "model_id": "stabilityai/stablelm-2-zephyr-1_6b", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 95.2 + "phase4_score": 98.1 }, { "architecture_id": "Qwen3ForCausalLM", @@ -9003,9 +8990,9 @@ "architecture_id": "BertForMaskedLM", "model_id": "shibing624/macbert4csc-base-chinese", "status": 1, - "verified_date": "2026-02-21", + "verified_date": "2026-03-10", "metadata": null, - "note": null, + "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, @@ -12399,13 +12386,13 @@ "architecture_id": "OlmoForCausalLM", "model_id": "allenai/OLMo-1B-0724-hf", "status": 1, - "verified_date": "2026-02-22", + "verified_date": "2026-03-10", "metadata": null, - "note": "Full verification completed with issues: P2=83.3% (failed: critical_backward_hooks, backward_hooks); P3=88.2% (failed: critical_backward_hooks, backward_hooks)", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": 83.3, - "phase3_score": 88.2, - "phase4_score": 94.8 + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 91.3 }, { "architecture_id": "Olmo3ForCausalLM", @@ -15747,14 +15734,14 @@ "architecture_id": "OpenELMForCausalLM", "model_id": "apple/OpenELM-3B-Instruct", "status": 1, - "verified_date": "2026-02-23", + "verified_date": "2026-03-10", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, "status_label": "UNVERIFIED", - "phase4_score": null + "phase4_score": 97.8 }, { "architecture_id": "OPTForCausalLM", @@ -23776,14 +23763,14 @@ { "architecture_id": "GPTNeoXForCausalLM", "model_id": "EleutherAI/pythia-410m-seed2", - "status": 3, - "verified_date": "2026-02-24", + "status": 1, + "verified_date": "2026-03-10", "metadata": null, - "note": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002012, mean_rel=0.000401", - "phase1_score": 50.0, + "note": "Full verification completed", + "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": null + "phase4_score": 87.6 }, { "architecture_id": "LlamaForCausalLM", diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json index a8eb3f1ac..32189c23b 100644 --- a/transformer_lens/tools/model_registry/data/verification_history.json +++ b/transformer_lens/tools/model_registry/data/verification_history.json @@ -1,5 +1,5 @@ { - "last_updated": "2026-02-25T08:05:33.770204", + "last_updated": "2026-03-10T16:26:35.208675", "records": [ { "model_id": "Macropodus/macbert4mdcspell_v1", @@ -7430,6 +7430,1876 @@ "notes": "Benchmark passed: P4=94.0%", "invalidated": false, "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002956, mean_rel=0.000962", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "shibing624/macbert4csc-base-chinese", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002956, mean_rel=0.000962", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "shibing624/macbert4csc-base-chinese", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloomz-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloom-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "openai-community/gpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-j-6b", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "peft-internal-testing/tiny-random-GPTJForCausalLM", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-125m", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-1.3B", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-160m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-70m-deduped", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2-2b-it", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=76.2% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=13.293901, mean_rel=32.253456", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "hmellor/tiny-random-Gemma2ForCausalLM", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-1b-it", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-270m", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality); P3=95.2% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b-it", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.5% but required tests failed: logits_equivalence \u2014 Found 1 significant mismatches in critical hooks", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=94.7% but required tests failed: logits_equivalence \u2014 Text quality score: 77.5/100 (avg perplexity: 372.2) \u2014 generated text may be incoherent", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components); P3=94.7% but required tests failed: log \u2014 2/10 components failed (2 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "meta-llama/Llama-3.2-1B-Instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "llm-jp/llm-jp-3-3.7b-instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.2", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.1", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=89.5% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=12.128962, mean_rel=0.271985", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-125m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-1.3b", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.0% but required tests failed: logits_equivalence \u2014 Tensors differ: max_diff=28.810719, mean_rel=60.400272", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B-Instruct", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct-SFT", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P2=64.3% < 75.0% (failed: logits_equivalence, loss_equivalence, hook_functionality, \u2014 Tensors differ: max_diff=19.425457, mean_rel=11.940315", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=58.8% < 75.0% (failed: weight_modification, hook_functionality, run_with_cache, \u2014 Critical backward hooks check failed: Output 0 of BackwardHookFunctionBackward is a view and is being modified inplace. This view was created inside a", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0125", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=89.5% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=25.509125, mean_rel=0.521523", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0924", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=76.2% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=21.740696, mean_rel=13.788611", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-1_1B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-3B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/Phi-3-mini-4k-instruct", + "architecture_id": "Phi3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-2", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002039, mean_rel=0.000401", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-1_5", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-0.6B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-4B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-3b-4e1t", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-2-zephyr-1_6b", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "lmqg/flan-t5-base-squad-qag", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Vamsi/T5_Paraphrase_Paws", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=89.5% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=12.128962, mean_rel=0.271985", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0125", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0924", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=94.7% but required tests failed: logits_equivalence \u2014 Text quality score: 77.5/100 (avg perplexity: 372.2) \u2014 generated text may be incoherent", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-2", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002039, mean_rel=0.000401", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-2", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=58.8% < 75.0% (failed: weight_modification, hook_functionality, run_with_cache, \u2014 Critical backward hooks check failed: Output 0 of BackwardHookFunctionBackward is a view and is being modified inplace. This view was created inside a", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002956, mean_rel=0.000962", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: forward_pass_logits) \u2014 Tensors differ: max_diff=0.002956, mean_rel=0.000962", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "shibing624/macbert4csc-base-chinese", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "shibing624/macbert4csc-base-chinese", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloomz-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloomz-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloom-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloom-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "openai-community/gpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "openai-community/gpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-j-6b", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "peft-internal-testing/tiny-random-GPTJForCausalLM", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-j-6b", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "peft-internal-testing/tiny-random-GPTJForCausalLM", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-125m", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-125m", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-1.3B", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-1.3B", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-160m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-70m-deduped", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2-2b-it", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=76.2% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=13.293901, mean_rel=32.253456", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "hmellor/tiny-random-Gemma2ForCausalLM", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-1b-it", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-270m", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality); P3=95.2% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b-it", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.5% but required tests failed: logits_equivalence \u2014 Found 1 significant mismatches in critical hooks", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components); P3=94.7% but required tests failed: log \u2014 2/10 components failed (2 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "meta-llama/Llama-3.2-1B-Instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "llm-jp/llm-jp-3-3.7b-instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.2", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.1", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=50.0% < 75.0% (failed: hook_functionality, critical_forward_hooks, forward_hooks \u2014 Backward hooks check failed: 'tuple' object has no attribute 'clone'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=50.0% < 75.0% (failed: hook_functionality, critical_forward_hooks, forward_hooks \u2014 Backward hooks check failed: 'tuple' object has no attribute 'clone'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-125m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-1.3b", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.0% but required tests failed: logits_equivalence \u2014 Tensors differ: max_diff=28.810719, mean_rel=60.400272", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B-Instruct", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct-SFT", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P2=64.3% < 75.0% (failed: logits_equivalence, loss_equivalence, hook_functionality, \u2014 Tensors differ: max_diff=19.425457, mean_rel=11.940315", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0125", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-09", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=50.0% < 75.0% (failed: hook_functionality, critical_forward_hooks, forward_hooks \u2014 Backward hooks check failed: 'tuple' object has no attribute 'clone'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0924", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-1_1B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-3B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/Phi-3-mini-4k-instruct", + "architecture_id": "Phi3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-2", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-1_5", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-0.6B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-4B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-3b-4e1t", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-2-zephyr-1_6b", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "lmqg/flan-t5-base-squad-qag", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Vamsi/T5_Paraphrase_Paws", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0125", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components); P3=94.7% but required tests failed: log \u2014 2/10 components failed (2 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-350m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components); P3=89.5% but required tests failed: log \u2014 1/149 components failed (1 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-410m-seed2", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-350m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=89.5% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=18.423609, mean_rel=0.259477", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-410m-seed2", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-350m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-410m-seed2", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-160m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-160m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Macropodus/macbert4mdcspell_v1", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "shibing624/macbert4csc-base-chinese", + "architecture_id": "BertForMaskedLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloomz-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=91.7% (failed: generation)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloom-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "openai-community/gpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-j-6b", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "peft-internal-testing/tiny-random-GPTJForCausalLM", + "architecture_id": "GPTJForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-125m", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/gpt-neo-1.3B", + "architecture_id": "GPTNeoForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-160m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-70m-deduped", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2-2b-it", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=76.2% but required tests failed: logits_equivalence, loss_equivalence \u2014 Tensors differ: max_diff=13.293901, mean_rel=32.253456", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "hmellor/tiny-random-Gemma2ForCausalLM", + "architecture_id": "Gemma2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-1b-it", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-3-270m", + "architecture_id": "Gemma3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=92.9% (failed: hook_functionality); P3=95.2% (failed: hook_functionality)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/gemma-2b-it", + "architecture_id": "GemmaForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.5% but required tests failed: logits_equivalence \u2014 Found 1 significant mismatches in critical hooks", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "optimum-intel-internal-testing/tiny-GptOssForCausalLM", + "architecture_id": "GptOssForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P3=94.7% (failed: attention_output_centering)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "meta-llama/Llama-3.2-1B-Instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "llm-jp/llm-jp-3-3.7b-instruct", + "architecture_id": "LlamaForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.2", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "trl-internal-testing/tiny-MistralForCausalLM-0.1", + "architecture_id": "MistralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ggml-org/stories15M_MOE", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "TitanML/tiny-mixtral", + "architecture_id": "MixtralForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-125m", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "facebook/opt-1.3b", + "architecture_id": "OPTForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=90.0% but required tests failed: logits_equivalence \u2014 Tensors differ: max_diff=28.810719, mean_rel=60.400272", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-2-0425-1B-Instruct", + "architecture_id": "Olmo2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct-SFT", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/Olmo-3-7B-Instruct", + "architecture_id": "Olmo3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P2=64.3% < 75.0% (failed: logits_equivalence, loss_equivalence, hook_functionality, \u2014 Tensors differ: max_diff=19.425457, mean_rel=11.940315", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=58.8% < 75.0% (failed: weight_modification, hook_functionality, run_with_cache, \u2014 Critical backward hooks check failed: Output 0 of BackwardHookFunctionBackward is a view and is being modified inplace. This view was created inside a", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0125", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMoE-1B-7B-0924", + "architecture_id": "OlmoeForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-1_1B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "apple/OpenELM-3B-Instruct", + "architecture_id": "OpenELMForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/Phi-3-mini-4k-instruct", + "architecture_id": "Phi3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-2", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/phi-1_5", + "architecture_id": "PhiForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "architecture_id": "Qwen2ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-0.6B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3-4B", + "architecture_id": "Qwen3ForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-3b-4e1t", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "stabilityai/stablelm-2-zephyr-1_6b", + "architecture_id": "StableLmForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "lmqg/flan-t5-base-squad-qag", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues, low text quality", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Vamsi/T5_Paraphrase_Paws", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "allenai/OLMo-1B-0724-hf", + "architecture_id": "OlmoForCausalLM", + "verified_date": "2026-03-10", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null } ] } diff --git a/transformer_lens/weight_processing.py b/transformer_lens/weight_processing.py index 24bfd56e5..e5f824cb3 100644 --- a/transformer_lens/weight_processing.py +++ b/transformer_lens/weight_processing.py @@ -643,26 +643,44 @@ def _fold_mlp_layer_norm( if has_ln and ln2_w is not None: # MoE layers: fold ln2 into router gate and each expert's W_in/W_gate if getattr(cfg, "num_experts", None) is not None and cfg.num_experts > 0: + # Track folds; skip setting ln2 to identity if expert weights + # aren't in the state dict (Bridge MoE wraps the whole module). + expert_fold_count = 0 + expected_expert_folds = cfg.num_experts * 2 # W_in + W_gate per expert + # Fold into router gate - router_key = f"blocks.{layer}.mlp.W_gate.weight" + router_key = ProcessWeights._resolve_state_dict_key( + state_dict, f"blocks.{layer}.mlp.W_gate.weight", layer + ) if router_key in state_dict: state_dict[router_key] = state_dict[router_key] * ln2_w[None, :] # Fold into each expert's W_in and W_gate (SwiGLU gate) for e in range(cfg.num_experts): for suffix in ("W_in.weight", "W_gate.weight"): - key = f"blocks.{layer}.mlp.experts.{e}.{suffix}" + key = ProcessWeights._resolve_state_dict_key( + state_dict, + f"blocks.{layer}.mlp.experts.{e}.{suffix}", + layer, + ) if key in state_dict: state_dict[key] = state_dict[key] * ln2_w[None, :] - # Set ln2.w to identity (skip for parallel override — ln1 already identity) - if ln2_w_key is not None: - state_dict[ln2_w_key] = torch.ones_like(ln2_w) - alternate_ln2_w_key = ( - ln2_w_key.replace("ln_2", "ln2") - if "ln_2" in ln2_w_key - else ln2_w_key.replace("ln2", "ln_2") - ) - if alternate_ln2_w_key != ln2_w_key and alternate_ln2_w_key in state_dict: - state_dict[alternate_ln2_w_key] = torch.ones_like(ln2_w) + expert_fold_count += 1 + + # Only set ln2 to identity if we actually folded into expert weights. + if expert_fold_count > 0: + if ln2_w_key is not None: + state_dict[ln2_w_key] = torch.ones_like(ln2_w) + alternate_ln2_w_key = ( + ln2_w_key.replace("ln_2", "ln2") + if "ln_2" in ln2_w_key + else ln2_w_key.replace("ln2", "ln_2") + ) + if alternate_ln2_w_key != ln2_w_key and alternate_ln2_w_key in state_dict: + state_dict[alternate_ln2_w_key] = torch.ones_like(ln2_w) + else: + # No expert weights found — undo router gate fold for consistency. + if router_key in state_dict: + state_dict[router_key] = state_dict[router_key] / ln2_w[None, :] return state_dict mlp_W_in = ProcessWeights.convert_tensor_to_tl_format( @@ -1195,27 +1213,24 @@ def center_writing_weights( except ValueError: mlp_W_out_key = None mlp_b_out_key = None - if attn_W_O_key not in state_dict: - raise KeyError( - f"Expected attention W_O key '{attn_W_O_key}' not found in state_dict for layer {l}. Available keys: {list(state_dict.keys())[:10]}..." - ) - attn_W_O = ProcessWeights.convert_tensor_to_tl_format( - attn_W_O_key, state_dict, state_dict.get(attn_W_O_key), cfg, adapter, l - ) - assert attn_W_O is not None, f"Attention W_O not found at key {attn_W_O_key}" - attn_W_O = attn_W_O - attn_W_O.mean(-1, keepdim=True) - state_dict[attn_W_O_key] = ProcessWeights.convert_tensor_to_hf_format( - attn_W_O_key, attn_W_O, cfg, adapter, l - ) - if attn_b_O_key in state_dict: - attn_b_O = ProcessWeights.convert_tensor_to_tl_format( - attn_b_O_key, state_dict, state_dict.get(attn_b_O_key), cfg, adapter, l + if attn_W_O_key in state_dict: + attn_W_O = ProcessWeights.convert_tensor_to_tl_format( + attn_W_O_key, state_dict, state_dict.get(attn_W_O_key), cfg, adapter, l ) - assert attn_b_O is not None, f"Attention b_O not found at key {attn_b_O_key}" - attn_b_O = attn_b_O - attn_b_O.mean() - state_dict[attn_b_O_key] = ProcessWeights.convert_tensor_to_hf_format( - attn_b_O_key, attn_b_O, cfg, adapter, l + assert attn_W_O is not None, f"Attention W_O not found at key {attn_W_O_key}" + attn_W_O = attn_W_O - attn_W_O.mean(-1, keepdim=True) + state_dict[attn_W_O_key] = ProcessWeights.convert_tensor_to_hf_format( + attn_W_O_key, attn_W_O, cfg, adapter, l ) + if attn_b_O_key in state_dict: + attn_b_O = ProcessWeights.convert_tensor_to_tl_format( + attn_b_O_key, state_dict, state_dict.get(attn_b_O_key), cfg, adapter, l + ) + assert attn_b_O is not None, f"Attention b_O not found at key {attn_b_O_key}" + attn_b_O = attn_b_O - attn_b_O.mean() + state_dict[attn_b_O_key] = ProcessWeights.convert_tensor_to_hf_format( + attn_b_O_key, attn_b_O, cfg, adapter, l + ) if not getattr(cfg, "attn_only", False): is_moe = getattr(cfg, "num_experts", None) is not None and cfg.num_experts > 0 if is_moe: @@ -1233,9 +1248,12 @@ def center_writing_weights( break if expert_W_out_key is None and adapter: try: - expert_W_out_key = ProcessWeights._get_param_key( + candidate = ProcessWeights._get_param_key( f"blocks.{l}.mlp.experts.{e}.W_out", adapter ) + expert_W_out_key = ProcessWeights._resolve_state_dict_key( + state_dict, candidate, l + ) except ValueError: pass if expert_W_out_key and expert_W_out_key in state_dict: @@ -1264,6 +1282,16 @@ def center_writing_weights( if pattern in state_dict: expert_b_out_key = pattern break + if expert_b_out_key is None and adapter: + try: + candidate = ProcessWeights._get_param_key( + f"blocks.{l}.mlp.experts.{e}.b_out", adapter + ) + expert_b_out_key = ProcessWeights._resolve_state_dict_key( + state_dict, candidate, l + ) + except ValueError: + pass if expert_b_out_key and expert_b_out_key in state_dict: expert_b_out = ProcessWeights.convert_tensor_to_tl_format( expert_b_out_key, @@ -1282,11 +1310,7 @@ def center_writing_weights( ] = ProcessWeights.convert_tensor_to_hf_format( expert_b_out_key, expert_b_out, cfg, adapter, l ) - elif mlp_W_out_key is not None: - if mlp_W_out_key not in state_dict: - raise KeyError( - f"Expected MLP W_out key '{mlp_W_out_key}' not found in state_dict for layer {l}. Available keys: {list(state_dict.keys())[:10]}..." - ) + elif mlp_W_out_key is not None and mlp_W_out_key in state_dict: mlp_W_out = ProcessWeights.convert_tensor_to_tl_format( mlp_W_out_key, state_dict, state_dict.get(mlp_W_out_key), cfg, adapter, l ) @@ -1485,6 +1509,19 @@ def fold_value_biases( state_dict[b_V_key] = ProcessWeights.convert_tensor_to_hf_format( b_V_key, new_b_V, cfg, adapter, layer ) + elif is_split_format and len(b_V.shape) == 1 and len(W_O.shape) == 3: + # Split bias [n_heads * d_head] with W_O already in TL format [n_heads, d_head, d_model] + n_heads = cfg.n_heads + d_head = cfg.d_head + b_V_reshaped = b_V.reshape(n_heads, d_head) + if getattr(cfg, "n_key_value_heads", None) is not None: + b_V_reshaped = torch.repeat_interleave( + b_V_reshaped, dim=0, repeats=cfg.n_heads // cfg.n_key_value_heads + ) + folded_b_O = b_O_original + (b_V_reshaped[:, :, None] * W_O).sum([0, 1]) + state_dict[b_V_key] = ProcessWeights.convert_tensor_to_hf_format( + b_V_key, torch.zeros_like(b_V), cfg, adapter, layer + ) elif len(b_V.shape) == 2 and len(W_O.shape) == 3: b_V_original_shape = b_V.shape if getattr(cfg, "n_key_value_heads", None) is not None: @@ -1594,6 +1631,13 @@ def process_weights( # models with combined QKV projections (e.g., OpenELM's qkv_proj) may # not be able to fold attention LN — setting ln1.w=1.0 without folding # destroys the RMS scaling. + # Some adapters (e.g., post-LN) don't support center_writing_weights. + if ( + center_writing_weights + and adapter + and not getattr(adapter, "supports_center_writing_weights", True) + ): + center_writing_weights = False if center_writing_weights: if getattr(cfg, "normalization_type", "LN") in ["LN", "LNPre"] and ( not getattr(cfg, "final_rms", False)