3434_BIOBERT_PATH = "https://github.com/naver/biobert-pretrained/releases/download/"
3535_SCIBERT_PATH = "https://s3-us-west-2.amazonaws.com/ai2-s2-research/" \
3636 "scibert/tensorflow_models/"
37+ _BERT_MSMARCO_PATH = "https://drive.google.com/file/d/"
3738
3839
3940class PretrainedBERTMixin (PretrainedMixin , ABC ):
@@ -97,6 +98,16 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
9798 * ``scibert-basevocab-cased``: Cased version of the model trained on
9899 the original BERT vocabulary.
99100
101+ * **BERT for MS-MARCO**: proposed in (`Nogueira et al`. 2019)
102+ `Passage Re-ranking with BERT`_. A BERT model fine-tuned on MS-MARCO
103+ (Nguyen et al., 2016) dataset. It's the best performing model (on Jan 8th
104+ 2019) on MS-MARCO Passage re-ranking task. Two models are included:
105+
106+ * ``bert-msmarco-base``: Original BERT base model fine-tuned on
107+ MS-MARCO.
108+ * ``bert-msmarco-large``: Original BERT large model fine-tuned on
109+ MS-MARCO.
110+
100111 We provide the following BERT classes:
101112
102113 * :class:`~texar.torch.modules.BERTEncoder` for text encoding.
@@ -111,6 +122,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
111122
112123 .. _`SciBERT: A Pretrained Language Model for Scientific Text`:
113124 https://arxiv.org/abs/1903.10676
125+
126+ .. _`BERT for MS-MARCO: Passage re-ranking with BERT`:
127+ https://arxiv.org/abs/1901.04085
114128 """
115129
116130 _MODEL_NAME = "BERT"
@@ -150,6 +164,12 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
150164 _SCIBERT_PATH + 'scibert_basevocab_uncased.tar.gz' ,
151165 'scibert-basevocab-cased' :
152166 _SCIBERT_PATH + 'scibert_basevocab_cased.tar.gz' ,
167+
168+ # BERT for MS-MARCO
169+ 'bert-msmarco-base' :
170+ _BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/view' ,
171+ 'bert-msmarco-large' :
172+ _BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/view'
153173 }
154174 _MODEL2CKPT = {
155175 # Standard BERT
@@ -172,6 +192,10 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
172192 'scibert-scivocab-cased' : 'bert_model.ckpt' ,
173193 'scibert-basevocab-uncased' : 'bert_model.ckpt' ,
174194 'scibert-basevocab-cased' : 'bert_model.ckpt' ,
195+
196+ # BERT for MSMARCO
197+ 'bert-msmarco-base' : 'model.ckpt-100000' ,
198+ 'bert-msmarco-large' : 'model.ckpt-100000' ,
175199 }
176200
177201 @classmethod
0 commit comments