Skip to content

Commit e0a2da2

Browse files
author
Atif Ahmed
committed
Adding BERT for MS-MARCO passage re-ranking pretrained model
1 parent 54f9fee commit e0a2da2

File tree

1 file changed

+24
-0
lines changed
  • texar/torch/modules/pretrained

1 file changed

+24
-0
lines changed

texar/torch/modules/pretrained/bert.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
_BIOBERT_PATH = "https://github.com/naver/biobert-pretrained/releases/download/"
3535
_SCIBERT_PATH = "https://s3-us-west-2.amazonaws.com/ai2-s2-research/" \
3636
"scibert/tensorflow_models/"
37+
_BERT_MSMARCO_PATH = "https://drive.google.com/file/d/"
3738

3839

3940
class PretrainedBERTMixin(PretrainedMixin, ABC):
@@ -97,6 +98,16 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
9798
* ``scibert-basevocab-cased``: Cased version of the model trained on
9899
the original BERT vocabulary.
99100
101+
* **BERT for MS-MARCO**: proposed in (`Nogueira et al`. 2019)
102+
`Passage Re-ranking with BERT`_. A BERT model fine-tuned on MS-MARCO
103+
(Nguyen et al., 2016) dataset. It's the best performing model (on Jan 8th
104+
2019) on MS-MARCO Passage re-ranking task. Two models are included:
105+
106+
* ``bert-msmarco-base``: Original BERT base model fine-tuned on
107+
MS-MARCO.
108+
* ``bert-msmarco-large``: Original BERT large model fine-tuned on
109+
MS-MARCO.
110+
100111
We provide the following BERT classes:
101112
102113
* :class:`~texar.torch.modules.BERTEncoder` for text encoding.
@@ -111,6 +122,9 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
111122
112123
.. _`SciBERT: A Pretrained Language Model for Scientific Text`:
113124
https://arxiv.org/abs/1903.10676
125+
126+
.. _`BERT for MS-MARCO: Passage re-ranking with BERT`:
127+
https://arxiv.org/abs/1901.04085
114128
"""
115129

116130
_MODEL_NAME = "BERT"
@@ -150,6 +164,12 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
150164
_SCIBERT_PATH + 'scibert_basevocab_uncased.tar.gz',
151165
'scibert-basevocab-cased':
152166
_SCIBERT_PATH + 'scibert_basevocab_cased.tar.gz',
167+
168+
# BERT for MS-MARCO
169+
'bert-msmarco-base':
170+
_BERT_MSMARCO_PATH + '1cyUrhs7JaCJTTu-DjFUqP6Bs4f8a6JTX/view',
171+
'bert-msmarco-large':
172+
_BERT_MSMARCO_PATH + '1crlASTMlsihALlkabAQP6JTYIZwC1Wm8/view'
153173
}
154174
_MODEL2CKPT = {
155175
# Standard BERT
@@ -172,6 +192,10 @@ class PretrainedBERTMixin(PretrainedMixin, ABC):
172192
'scibert-scivocab-cased': 'bert_model.ckpt',
173193
'scibert-basevocab-uncased': 'bert_model.ckpt',
174194
'scibert-basevocab-cased': 'bert_model.ckpt',
195+
196+
# BERT for MSMARCO
197+
'bert-msmarco-base': 'model.ckpt-100000',
198+
'bert-msmarco-large': 'model.ckpt-100000',
175199
}
176200

177201
@classmethod

0 commit comments

Comments
 (0)