|
5 | 5 | "colab": { |
6 | 6 | "name": "networks_seq2seq_nmt.ipynb", |
7 | 7 | "provenance": [], |
| 8 | + "private_outputs": true, |
8 | 9 | "collapsed_sections": [], |
9 | 10 | "toc_visible": true |
10 | 11 | }, |
11 | 12 | "kernelspec": { |
12 | | - "name": "python3", |
13 | | - "display_name": "Python 3" |
| 13 | + "display_name": "Python 3", |
| 14 | + "language": "python", |
| 15 | + "name": "python3" |
| 16 | + }, |
| 17 | + "language_info": { |
| 18 | + "codemirror_mode": { |
| 19 | + "name": "ipython", |
| 20 | + "version": 3 |
| 21 | + }, |
| 22 | + "file_extension": ".py", |
| 23 | + "mimetype": "text/x-python", |
| 24 | + "name": "python", |
| 25 | + "nbconvert_exporter": "python", |
| 26 | + "pygments_lexer": "ipython3", |
| 27 | + "version": "3.7.3rc1" |
14 | 28 | } |
15 | 29 | }, |
16 | 30 | "cells": [ |
17 | 31 | { |
18 | 32 | "cell_type": "code", |
19 | 33 | "metadata": { |
20 | | - "id": "bl9GdT7h0Hxk", |
21 | 34 | "colab_type": "code", |
| 35 | + "id": "bl9GdT7h0Hxk", |
22 | 36 | "colab": {} |
23 | 37 | }, |
24 | 38 | "source": [ |
|
40 | 54 | { |
41 | 55 | "cell_type": "markdown", |
42 | 56 | "metadata": { |
43 | | - "id": "WhwgQAn50EZp", |
44 | | - "colab_type": "text" |
| 57 | + "colab_type": "text", |
| 58 | + "id": "WhwgQAn50EZp" |
45 | 59 | }, |
46 | 60 | "source": [ |
47 | 61 | "# TensorFlow Addons Networks : Sequence-to-Sequence NMT \n", |
|
65 | 79 | { |
66 | 80 | "cell_type": "markdown", |
67 | 81 | "metadata": { |
68 | | - "id": "ip0n8178Fuwm", |
69 | | - "colab_type": "text" |
| 82 | + "colab_type": "text", |
| 83 | + "id": "ip0n8178Fuwm" |
70 | 84 | }, |
71 | 85 | "source": [ |
72 | 86 | "# **Overview**\n", |
|
87 | 101 | { |
88 | 102 | "cell_type": "markdown", |
89 | 103 | "metadata": { |
90 | | - "id": "YNiadLKNLleD", |
91 | | - "colab_type": "text" |
| 104 | + "colab_type": "text", |
| 105 | + "id": "YNiadLKNLleD" |
92 | 106 | }, |
93 | 107 | "source": [ |
94 | 108 | "# **Setup**" |
|
97 | 111 | { |
98 | 112 | "cell_type": "code", |
99 | 113 | "metadata": { |
100 | | - "id": "1bUHYPhlF-Ql", |
101 | 114 | "colab_type": "code", |
| 115 | + "id": "1bUHYPhlF-Ql", |
102 | 116 | "colab": {} |
103 | 117 | }, |
104 | 118 | "source": [ |
|
110 | 124 | { |
111 | 125 | "cell_type": "code", |
112 | 126 | "metadata": { |
113 | | - "id": "jw044zGCZp-K", |
114 | 127 | "colab_type": "code", |
| 128 | + "id": "jw044zGCZp-K", |
115 | 129 | "colab": {} |
116 | 130 | }, |
117 | 131 | "source": [ |
|
120 | 134 | "except:\n", |
121 | 135 | " pass\n", |
122 | 136 | "!pip install -q --no-deps tensorflow-addons~=0.6\n", |
| 137 | + "!pip install nltk\n", |
123 | 138 | "import tensorflow as tf\n", |
124 | 139 | "import tensorflow_addons as tfa" |
125 | 140 | ], |
|
129 | 144 | { |
130 | 145 | "cell_type": "markdown", |
131 | 146 | "metadata": { |
132 | | - "id": "82GcQTsGf414", |
133 | | - "colab_type": "text" |
| 147 | + "colab_type": "text", |
| 148 | + "id": "82GcQTsGf414" |
134 | 149 | }, |
135 | 150 | "source": [ |
136 | 151 | "## **Additional Resources:**\n", |
|
150 | 165 | { |
151 | 166 | "cell_type": "code", |
152 | 167 | "metadata": { |
153 | | - "id": "5OIlpST_6ga-", |
154 | 168 | "colab_type": "code", |
| 169 | + "id": "5OIlpST_6ga-", |
155 | 170 | "colab": {} |
156 | 171 | }, |
157 | 172 | "source": [ |
|
166 | 181 | { |
167 | 182 | "cell_type": "code", |
168 | 183 | "metadata": { |
169 | | - "id": "co6-YpBwL-4d", |
170 | 184 | "colab_type": "code", |
| 185 | + "id": "co6-YpBwL-4d", |
171 | 186 | "colab": {} |
172 | 187 | }, |
173 | 188 | "source": [ |
|
181 | 196 | "from tensorflow.keras.preprocessing.text import Tokenizer\n", |
182 | 197 | "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", |
183 | 198 | "from tensorflow.keras.utils import to_categorical\n", |
184 | | - "from keras.utils.vis_utils import plot_model\n", |
185 | 199 | "from tensorflow.keras.models import Sequential\n", |
186 | 200 | "from tensorflow.keras.layers import LSTM\n", |
187 | 201 | "from tensorflow.keras.layers import Dense\n", |
|
192 | 206 | "from pickle import load\n", |
193 | 207 | "from numpy import array\n", |
194 | 208 | "from numpy import argmax\n", |
195 | | - "from keras.models import load_model\n", |
196 | 209 | "from nltk.translate.bleu_score import corpus_bleu" |
197 | 210 | ], |
198 | 211 | "execution_count": 0, |
|
201 | 214 | { |
202 | 215 | "cell_type": "markdown", |
203 | 216 | "metadata": { |
204 | | - "id": "q7gjUT_9XSoj", |
205 | | - "colab_type": "text" |
| 217 | + "colab_type": "text", |
| 218 | + "id": "q7gjUT_9XSoj" |
206 | 219 | }, |
207 | 220 | "source": [ |
208 | 221 | "## **Data Cleaning**\n", |
|
213 | 226 | { |
214 | 227 | "cell_type": "code", |
215 | 228 | "metadata": { |
216 | | - "id": "6ZIu-TNqKFsd", |
217 | 229 | "colab_type": "code", |
| 230 | + "id": "6ZIu-TNqKFsd", |
218 | 231 | "colab": {} |
219 | 232 | }, |
220 | 233 | "source": [ |
|
269 | 282 | { |
270 | 283 | "cell_type": "markdown", |
271 | 284 | "metadata": { |
272 | | - "id": "eXpft1qQknO8", |
273 | | - "colab_type": "text" |
| 285 | + "colab_type": "text", |
| 286 | + "id": "eXpft1qQknO8" |
274 | 287 | }, |
275 | 288 | "source": [ |
276 | 289 | "## **Saving the Cleaned Dataset**" |
|
279 | 292 | { |
280 | 293 | "cell_type": "code", |
281 | 294 | "metadata": { |
282 | | - "id": "GMxdlVU1X8yI", |
283 | 295 | "colab_type": "code", |
| 296 | + "id": "GMxdlVU1X8yI", |
284 | 297 | "colab": {} |
285 | 298 | }, |
286 | 299 | "source": [ |
|
301 | 314 | { |
302 | 315 | "cell_type": "markdown", |
303 | 316 | "metadata": { |
304 | | - "id": "Cfb66QxWYr6A", |
305 | | - "colab_type": "text" |
| 317 | + "colab_type": "text", |
| 318 | + "id": "Cfb66QxWYr6A" |
306 | 319 | }, |
307 | 320 | "source": [ |
308 | 321 | "## **Data Preparation**" |
|
311 | 324 | { |
312 | 325 | "cell_type": "code", |
313 | 326 | "metadata": { |
314 | | - "id": "3oq60MBPSanQ", |
315 | 327 | "colab_type": "code", |
| 328 | + "id": "3oq60MBPSanQ", |
316 | 329 | "colab": {} |
317 | 330 | }, |
318 | 331 | "source": [ |
|
342 | 355 | { |
343 | 356 | "cell_type": "code", |
344 | 357 | "metadata": { |
345 | | - "id": "XH5oSRNeSc1s", |
346 | 358 | "colab_type": "code", |
| 359 | + "id": "XH5oSRNeSc1s", |
347 | 360 | "colab": {} |
348 | 361 | }, |
349 | 362 | "source": [ |
|
378 | 391 | { |
379 | 392 | "cell_type": "markdown", |
380 | 393 | "metadata": { |
381 | | - "id": "UQRgJcYgapqE", |
382 | | - "colab_type": "text" |
| 394 | + "colab_type": "text", |
| 395 | + "id": "UQRgJcYgapqE" |
383 | 396 | }, |
384 | 397 | "source": [ |
385 | 398 | "## **Defining NMT Model**" |
|
388 | 401 | { |
389 | 402 | "cell_type": "code", |
390 | 403 | "metadata": { |
391 | | - "id": "sGdakRtjaokF", |
392 | 404 | "colab_type": "code", |
| 405 | + "id": "sGdakRtjaokF", |
393 | 406 | "colab": {} |
394 | 407 | }, |
395 | 408 | "source": [ |
|
409 | 422 | { |
410 | 423 | "cell_type": "markdown", |
411 | 424 | "metadata": { |
412 | | - "id": "NPwcfddTa0oB", |
413 | | - "colab_type": "text" |
| 425 | + "colab_type": "text", |
| 426 | + "id": "NPwcfddTa0oB" |
414 | 427 | }, |
415 | 428 | "source": [ |
416 | 429 | "## **Tokenization**" |
|
419 | 432 | { |
420 | 433 | "cell_type": "code", |
421 | 434 | "metadata": { |
422 | | - "id": "x1BEqVyra2jW", |
423 | 435 | "colab_type": "code", |
| 436 | + "id": "x1BEqVyra2jW", |
424 | 437 | "colab": {} |
425 | 438 | }, |
426 | 439 | "source": [ |
|
453 | 466 | { |
454 | 467 | "cell_type": "markdown", |
455 | 468 | "metadata": { |
456 | | - "id": "v5uzLcu2bNX3", |
457 | | - "colab_type": "text" |
| 469 | + "colab_type": "text", |
| 470 | + "id": "v5uzLcu2bNX3" |
458 | 471 | }, |
459 | 472 | "source": [ |
460 | 473 | "## **Training**" |
|
463 | 476 | { |
464 | 477 | "cell_type": "code", |
465 | 478 | "metadata": { |
466 | | - "id": "PvfD2SknWrt6", |
467 | 479 | "colab_type": "code", |
| 480 | + "id": "PvfD2SknWrt6", |
468 | 481 | "colab": {} |
469 | 482 | }, |
470 | 483 | "source": [ |
|
483 | 496 | { |
484 | 497 | "cell_type": "markdown", |
485 | 498 | "metadata": { |
486 | | - "id": "nDyK-EGqbN5r", |
487 | | - "colab_type": "text" |
| 499 | + "colab_type": "text", |
| 500 | + "id": "nDyK-EGqbN5r" |
488 | 501 | }, |
489 | 502 | "source": [ |
490 | 503 | "## **Evaluation based on BLEU scores**" |
|
493 | 506 | { |
494 | 507 | "cell_type": "code", |
495 | 508 | "metadata": { |
496 | | - "id": "y98sfom7SuGy", |
497 | 509 | "colab_type": "code", |
| 510 | + "id": "y98sfom7SuGy", |
498 | 511 | "colab": {} |
499 | 512 | }, |
500 | 513 | "source": [ |
|
542 | 555 | { |
543 | 556 | "cell_type": "markdown", |
544 | 557 | "metadata": { |
545 | | - "id": "iodjSItQds1t", |
546 | | - "colab_type": "text" |
| 558 | + "colab_type": "text", |
| 559 | + "id": "iodjSItQds1t" |
547 | 560 | }, |
548 | 561 | "source": [ |
549 | 562 | "## **Final Translation**" |
|
552 | 565 | { |
553 | 566 | "cell_type": "code", |
554 | 567 | "metadata": { |
555 | | - "id": "K6aWFB5IWlH2", |
556 | 568 | "colab_type": "code", |
| 569 | + "id": "K6aWFB5IWlH2", |
557 | 570 | "colab": {} |
558 | 571 | }, |
559 | 572 | "source": [ |
|
582 | 595 | { |
583 | 596 | "cell_type": "code", |
584 | 597 | "metadata": { |
585 | | - "id": "sp3trjITtq95", |
586 | 598 | "colab_type": "code", |
| 599 | + "id": "sp3trjITtq95", |
587 | 600 | "colab": {} |
588 | 601 | }, |
589 | 602 | "source": [ |
|
595 | 608 | { |
596 | 609 | "cell_type": "markdown", |
597 | 610 | "metadata": { |
598 | | - "id": "g6Av-oPWvRc4", |
599 | | - "colab_type": "text" |
| 611 | + "colab_type": "text", |
| 612 | + "id": "g6Av-oPWvRc4" |
600 | 613 | }, |
601 | 614 | "source": [ |
602 | 615 | "### The accuracy can be improved by implementing:\n", |
|
0 commit comments