Fixing the Google Drive download bug (#275)

atif93 · gpengzhi · commit 88163619ec69 · 2019-12-19T14:56:53.000-05:00
* Fixing the Google Drive download bug

* adding docstring

* Changing some imports
diff --git a/texar/torch/data/data_utils.py b/texar/torch/data/data_utils.py
@@ -33,6 +33,7 @@
     "read_words",
     "make_vocab",
     "count_file_lines",
+    "get_filename"
 ]
 
 Py3 = sys.version_info[0] == 3
@@ -139,6 +140,9 @@ def _progress_hook(count, block_size, total_size):
 def _extract_google_drive_file_id(url: str) -> str:
     # id is between `/d/` and '/'
     url_suffix = url[url.find('/d/') + 3:]
+    if url_suffix.find('/') == -1:
+        # if there's no trailing '/'
+        return url_suffix
     file_id = url_suffix[:url_suffix.find('/')]
     return file_id
 
@@ -305,3 +309,12 @@ def _count_lines(fn):
         filenames = [filenames]
     num_lines = np.sum([_count_lines(fn) for fn in filenames]).item()
     return num_lines
+
+
+def get_filename(url: str) -> str:
+    r"""Extracts the filename of the downloaded checkpoint file from the URL.
+    """
+    if 'drive.google.com' in url:
+        return _extract_google_drive_file_id(url)
+    url, filename = os.path.split(url)
+    return filename or os.path.basename(url)
diff --git a/texar/torch/modules/pretrained/pretrained_base.py b/texar/torch/modules/pretrained/pretrained_base.py
@@ -22,7 +22,7 @@
 
 from torch import nn
 
-from texar.torch.data.data_utils import maybe_download
+from texar.torch.data.data_utils import maybe_download, get_filename
 from texar.torch.hyperparams import HParams
 from texar.torch.module_base import ModuleBase
 from texar.torch.utils.types import MaybeList
@@ -200,7 +200,7 @@ def download_checkpoint(cls, pretrained_model_name: str,
 
         if not cache_path.exists():
             if isinstance(download_path, str):
-                filename = download_path.split('/')[-1]
+                filename = get_filename(download_path)
                 maybe_download(download_path, cache_path, extract=True)
 
                 # removing the compressed file