diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4adfd09..7cd4c15 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: hooks: - id: black args: ["--config", "./pyproject.toml"] - language_version: python3.7 + language_version: python3 - repo: https://github.com/asottile/seed-isort-config rev: v2.2.0 diff --git a/configs/caml_mimic3_50.yml b/configs/caml/caml_mimic3_50.yml similarity index 98% rename from configs/caml_mimic3_50.yml rename to configs/caml/caml_mimic3_50.yml index 84f704c..47cc2e8 100644 --- a/configs/caml_mimic3_50.yml +++ b/configs/caml/caml_mimic3_50.yml @@ -121,3 +121,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/caml_mimic3_50_old.yml b/configs/caml/caml_mimic3_50_old.yml similarity index 98% rename from configs/caml_mimic3_50_old.yml rename to configs/caml/caml_mimic3_50_old.yml index bb2bd2f..6d9bf50 100644 --- a/configs/caml_mimic3_50_old.yml +++ b/configs/caml/caml_mimic3_50_old.yml @@ -121,3 +121,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/caml_mimic3_full.yml b/configs/caml/caml_mimic3_full.yml similarity index 98% rename from configs/caml_mimic3_full.yml rename to configs/caml/caml_mimic3_full.yml index 9a66d1c..7a4633f 100644 --- a/configs/caml_mimic3_full.yml +++ b/configs/caml/caml_mimic3_full.yml @@ -121,3 +121,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/caml_mimic3_full_old.yml b/configs/caml/caml_mimic3_full_old.yml similarity index 98% rename from configs/caml_mimic3_full_old.yml rename to configs/caml/caml_mimic3_full_old.yml index 39ba0ed..7d37859 100644 --- a/configs/caml_mimic3_full_old.yml +++ b/configs/caml/caml_mimic3_full_old.yml @@ -121,3 +121,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/cnn_mimic3_50.yml b/configs/caml/cnn_mimic3_50.yml similarity index 98% rename from configs/cnn_mimic3_50.yml rename to configs/caml/cnn_mimic3_50.yml index 1bd742e..074758e 100644 --- a/configs/cnn_mimic3_50.yml +++ b/configs/caml/cnn_mimic3_50.yml @@ -117,3 +117,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/drcaml_mimic3_50.yml b/configs/caml/drcaml_mimic3_50.yml similarity index 98% rename from configs/drcaml_mimic3_50.yml rename to configs/caml/drcaml_mimic3_50.yml index 979fd04..ee90972 100644 --- a/configs/drcaml_mimic3_50.yml +++ b/configs/caml/drcaml_mimic3_50.yml @@ -121,3 +121,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/dcan/mimic3_50.yml b/configs/dcan/mimic3_50.yml index 788f329..e5a0a80 100644 --- a/configs/dcan/mimic3_50.yml +++ b/configs/dcan/mimic3_50.yml @@ -125,3 +125,4 @@ trainer: - name: micro_auc seed: 1 use_gpu: true + initialise_hidden_states: false diff --git a/configs/dcan/mimic3_50_old.yml b/configs/dcan/mimic3_50_old.yml index 380aacf..12ddfea 100644 --- a/configs/dcan/mimic3_50_old.yml +++ b/configs/dcan/mimic3_50_old.yml @@ -125,3 +125,4 @@ trainer: - name: micro_auc seed: 1 use_gpu: true + initialise_hidden_states: false diff --git a/configs/dcan/mimic3_full.yml b/configs/dcan/mimic3_full.yml index 03e6e11..95f6fc8 100644 --- a/configs/dcan/mimic3_full.yml +++ b/configs/dcan/mimic3_full.yml @@ -125,3 +125,4 @@ trainer: - name: micro_auc seed: 1 use_gpu: true + initialise_hidden_states: false diff --git a/configs/dcan/mimic3_full_old.yml b/configs/dcan/mimic3_full_old.yml index e21f402..2e69608 100644 --- a/configs/dcan/mimic3_full_old.yml +++ b/configs/dcan/mimic3_full_old.yml @@ -125,3 +125,4 @@ trainer: - name: micro_auc seed: 1 use_gpu: true + initialise_hidden_states: false diff --git a/configs/fusion/mimic3_50.yml b/configs/fusion/mimic3_50.yml index 05236c1..69e2276 100644 --- a/configs/fusion/mimic3_50.yml +++ b/configs/fusion/mimic3_50.yml @@ -126,3 +126,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: true diff --git a/configs/fusion/mimic3_50_old.yml b/configs/fusion/mimic3_50_old.yml index 53532cd..3ff6583 100644 --- a/configs/fusion/mimic3_50_old.yml +++ b/configs/fusion/mimic3_50_old.yml @@ -126,3 +126,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/fusion/mimic3_full.yml b/configs/fusion/mimic3_full.yml index af05013..a0f43f4 100644 --- a/configs/fusion/mimic3_full.yml +++ b/configs/fusion/mimic3_full.yml @@ -126,3 +126,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/fusion/mimic3_full_old.yml b/configs/fusion/mimic3_full_old.yml index 13b4659..9a953f4 100644 --- a/configs/fusion/mimic3_full_old.yml +++ b/configs/fusion/mimic3_full_old.yml @@ -126,3 +126,4 @@ trainer: - name: micro_auc seed: 1337 use_gpu: true + initialise_hidden_states: false diff --git a/configs/gatedcnn_nci/mimic3_50.yml b/configs/gatedcnn_nci/mimic3_50.yml new file mode 100644 index 0000000..5f0b893 --- /dev/null +++ b/configs/gatedcnn_nci/mimic3_50.yml @@ -0,0 +1,128 @@ +paths: + mimic_dir: &mimic_dir datasets/mimic3/csv + static_dir: &static_dir datasets/mimic3/static + dataset_dir: &dataset_dir datasets/mimic3_50 + word2vec_dir: &word2vec_dir datasets/mimic3_50/word2vec + output_dir: &output_dir results/gatedcnn_nci_mimic3_50 + +dataset: + name: base_dataset + data_common: &data_common + column_names: + hadm_id: "HADM_ID" + clinical_note: "TEXT" + labels: "LABELS" + word2vec_dir: *word2vec_dir + pad_token: "" + unk_token: "" + dataset_dir: *dataset_dir + label_file: labels.json + max_length: 2500 + params: + train: + <<: *data_common + data_file: train.json + val: + <<: *data_common + data_file: val.json + test: + <<: *data_common + data_file: test.json + +model: + name: gatedcnn_nci + params: + version: mimic3 + dataset_dir: *dataset_dir + mimic_dir: *mimic_dir + static_dir: *static_dir + embed_dir: *word2vec_dir + max_length: 2500 + dropout: 0.2 + input_dim: 100 + hidden_dim: 100 + output_dim: 50 + bidirectional: false + use_description: true + pad_token: "" + unk_token: "" + kernel_size: 3 + init_mean: 0 + init_std: 0.01 + levels: 3 + +trainer: + name: base_trainer + params: + output_dir: *output_dir + data_loader: + batch_size: 16 + num_workers: 4 + shuffle: false + drop_last: true + loss: + name: BinaryCrossEntropyLoss + params: null + optimizer: + name: adam + params: + lr: 0.0001 + weight_decay: 0.0 + max_epochs: 200 + lr_scheduler: null + stopping_criterion: + metric: + name: prec_at_8 + desired: max + patience: 10 + checkpoint_saver: + name: base_saver + params: + checkpoint_dir: *output_dir + interval: 1 + max_to_keep: 5 + ckpt_fname_format: "ckpt-{}.pth" + best_fname_format: "best-{}.pth" + metric: + name: prec_at_8 + class: prec_at_k + params: + k: 8 + desired: max + eval_metrics: &eval_metrics + - name: prec_at_5 + class: prec_at_k + params: + k: 5 + - name: prec_at_8 + class: prec_at_k + params: + k: 8 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + graph: + writer: + name: tensorboard + params: + log_dir: *output_dir + train: + interval: 100 + interval_unit: step + metric: + - name: loss + val: + interval: 1 + interval_unit: epoch + metric: + - name: loss + - name: prec_at_5 + - name: prec_at_8 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + seed: 1337 + use_gpu: true + initialise_hidden_states: true diff --git a/configs/gatedcnn_nci/mimic3_50_old.yml b/configs/gatedcnn_nci/mimic3_50_old.yml new file mode 100644 index 0000000..4eef858 --- /dev/null +++ b/configs/gatedcnn_nci/mimic3_50_old.yml @@ -0,0 +1,128 @@ +paths: + mimic_dir: &mimic_dir datasets/mimic3/csv + static_dir: &static_dir datasets/mimic3/static + dataset_dir: &dataset_dir datasets/mimic3_50_old + word2vec_dir: &word2vec_dir datasets/mimic3_50_old/word2vec + output_dir: &output_dir results/gatedcnn_nci_mimic3_50_old + +dataset: + name: base_dataset + data_common: &data_common + column_names: + hadm_id: "HADM_ID" + clinical_note: "TEXT" + labels: "LABELS" + word2vec_dir: *word2vec_dir + pad_token: "" + unk_token: "" + dataset_dir: *dataset_dir + label_file: labels.json + max_length: 2500 + params: + train: + <<: *data_common + data_file: train.json + val: + <<: *data_common + data_file: val.json + test: + <<: *data_common + data_file: test.json + +model: + name: gatedcnn_nci + params: + version: mimic3 + dataset_dir: *dataset_dir + mimic_dir: *mimic_dir + static_dir: *static_dir + embed_dir: *word2vec_dir + max_length: 2500 + dropout: 0.2 + input_dim: 100 + hidden_dim: 100 + output_dim: 50 + bidirectional: false + use_description: true + pad_token: "" + unk_token: "" + kernel_size: 3 + init_mean: 0 + init_std: 0.01 + levels: 3 + +trainer: + name: base_trainer + params: + output_dir: *output_dir + data_loader: + batch_size: 16 + num_workers: 4 + shuffle: false + drop_last: true + loss: + name: BinaryCrossEntropyLoss + params: null + optimizer: + name: adam + params: + lr: 0.01 + weight_decay: 0.0 + max_epochs: 100 + lr_scheduler: null + stopping_criterion: + metric: + name: prec_at_8 + desired: max + patience: 10 + checkpoint_saver: + name: base_saver + params: + checkpoint_dir: *output_dir + interval: 1 + max_to_keep: 5 + ckpt_fname_format: "ckpt-{}.pth" + best_fname_format: "best-{}.pth" + metric: + name: prec_at_8 + class: prec_at_k + params: + k: 8 + desired: max + eval_metrics: &eval_metrics + - name: prec_at_5 + class: prec_at_k + params: + k: 5 + - name: prec_at_8 + class: prec_at_k + params: + k: 8 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + graph: + writer: + name: tensorboard + params: + log_dir: *output_dir + train: + interval: 100 + interval_unit: step + metric: + - name: loss + val: + interval: 1 + interval_unit: epoch + metric: + - name: loss + - name: prec_at_5 + - name: prec_at_8 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + seed: 1337 + use_gpu: true + initialise_hidden_states: true diff --git a/configs/gatedcnn_nci/mimic3_full.yml b/configs/gatedcnn_nci/mimic3_full.yml new file mode 100644 index 0000000..8a833ad --- /dev/null +++ b/configs/gatedcnn_nci/mimic3_full.yml @@ -0,0 +1,128 @@ +paths: + mimic_dir: &mimic_dir datasets/mimic3/csv + static_dir: &static_dir datasets/mimic3/static + dataset_dir: &dataset_dir datasets/mimic3_full + word2vec_dir: &word2vec_dir datasets/mimic3_full/word2vec + output_dir: &output_dir results/gatedcnn_nci_mimic3_full + +dataset: + name: base_dataset + data_common: &data_common + column_names: + hadm_id: "HADM_ID" + clinical_note: "TEXT" + labels: "LABELS" + word2vec_dir: *word2vec_dir + pad_token: "" + unk_token: "" + dataset_dir: *dataset_dir + label_file: labels.json + max_length: 2500 + params: + train: + <<: *data_common + data_file: train.json + val: + <<: *data_common + data_file: val.json + test: + <<: *data_common + data_file: test.json + +model: + name: CAML + params: + version: mimic3 + dataset_dir: *dataset_dir + mimic_dir: *mimic_dir + static_dir: *static_dir + embed_dir: *word2vec_dir + max_length: 2500 + dropout: 0.2 + input_dim: 100 + hidden_dim: 100 + output_dim: 8922 + bidirectional: false + use_description: true + pad_token: "" + unk_token: "" + kernel_size: 3 + init_mean: 0 + init_std: 0.01 + levels: 3 + +trainer: + name: base_trainer + params: + output_dir: *output_dir + data_loader: + batch_size: 16 + num_workers: 4 + shuffle: false + drop_last: true + loss: + name: BinaryCrossEntropyLoss + params: null + optimizer: + name: adam + params: + lr: 0.0001 + weight_decay: 0.0 + max_epochs: 200 + lr_scheduler: null + stopping_criterion: + metric: + name: prec_at_8 + desired: max + patience: 10 + checkpoint_saver: + name: base_saver + params: + checkpoint_dir: *output_dir + interval: 1 + max_to_keep: 5 + ckpt_fname_format: "ckpt-{}.pth" + best_fname_format: "best-{}.pth" + metric: + name: prec_at_8 + class: prec_at_k + params: + k: 8 + desired: max + eval_metrics: &eval_metrics + - name: prec_at_8 + class: prec_at_k + params: + k: 8 + - name: prec_at_15 + class: prec_at_k + params: + k: 15 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + graph: + writer: + name: tensorboard + params: + log_dir: *output_dir + train: + interval: 100 + interval_unit: step + metric: + - name: loss + val: + interval: 1 + interval_unit: epoch + metric: + - name: loss + - name: prec_at_8 + - name: prec_at_15 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + seed: 1337 + use_gpu: true + initialise_hidden_states: true diff --git a/configs/gatedcnn_nci/mimic3_full_old.yml b/configs/gatedcnn_nci/mimic3_full_old.yml new file mode 100644 index 0000000..d76c03b --- /dev/null +++ b/configs/gatedcnn_nci/mimic3_full_old.yml @@ -0,0 +1,128 @@ +paths: + mimic_dir: &mimic_dir datasets/mimic3/csv + static_dir: &static_dir datasets/mimic3/static + dataset_dir: &dataset_dir datasets/mimic3_full_old + word2vec_dir: &word2vec_dir datasets/mimic3_full_old/word2vec + output_dir: &output_dir results/gatedcnn_nci_mimic3_full_old + +dataset: + name: base_dataset + data_common: &data_common + column_names: + hadm_id: "HADM_ID" + clinical_note: "TEXT" + labels: "LABELS" + word2vec_dir: *word2vec_dir + pad_token: "" + unk_token: "" + dataset_dir: *dataset_dir + label_file: labels.json + max_length: 2500 + params: + train: + <<: *data_common + data_file: train.json + val: + <<: *data_common + data_file: val.json + test: + <<: *data_common + data_file: test.json + +model: + name: gatedcnn_nci + params: + version: mimic3 + dataset_dir: *dataset_dir + mimic_dir: *mimic_dir + static_dir: *static_dir + embed_dir: *word2vec_dir + max_length: 2500 + dropout: 0.2 + input_dim: 100 + hidden_dim: 100 + output_dim: 8922 + bidirectional: false + use_description: true + pad_token: "" + unk_token: "" + kernel_size: 3 + init_mean: 0 + init_std: 0.01 + levels: 3 + +trainer: + name: base_trainer + params: + output_dir: *output_dir + data_loader: + batch_size: 16 + num_workers: 4 + shuffle: false + drop_last: true + loss: + name: BinaryCrossEntropyLoss + params: null + optimizer: + name: adam + params: + lr: 0.0001 + weight_decay: 0.0 + max_epochs: 200 + lr_scheduler: null + stopping_criterion: + metric: + name: prec_at_8 + desired: max + patience: 10 + checkpoint_saver: + name: base_saver + params: + checkpoint_dir: *output_dir + interval: 1 + max_to_keep: 5 + ckpt_fname_format: "ckpt-{}.pth" + best_fname_format: "best-{}.pth" + metric: + name: prec_at_8 + class: prec_at_k + params: + k: 8 + desired: max + eval_metrics: &eval_metrics + - name: prec_at_8 + class: prec_at_k + params: + k: 8 + - name: prec_at_15 + class: prec_at_k + params: + k: 15 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + graph: + writer: + name: tensorboard + params: + log_dir: *output_dir + train: + interval: 100 + interval_unit: step + metric: + - name: loss + val: + interval: 1 + interval_unit: epoch + metric: + - name: loss + - name: prec_at_8 + - name: prec_at_15 + - name: macro_f1 + - name: micro_f1 + - name: macro_auc + - name: micro_auc + seed: 1337 + use_gpu: true + initialise_hidden_states: true diff --git a/src/models/__init__.py b/src/models/__init__.py index 04f7630..9cf8c04 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -2,3 +2,4 @@ from src.models.caml import VanillaConv as CNN from src.models.dcan import DCAN from src.models.fusion import Fusion +from src.models.gatedcnn_nci import GatedCNNNCI diff --git a/src/models/fusion.py b/src/models/fusion.py index 69fed90..5236047 100755 --- a/src/models/fusion.py +++ b/src/models/fusion.py @@ -415,7 +415,7 @@ class Fusion(nn.Module): def __init__(self, config): super(Fusion, self).__init__() - logger.info(f"Initialising %s", self.__class__.__name__) + logger.info("Initialising %s", self.__class__.__name__) logger.debug( "Initialising %s with config: %s", self.__class__.__name__, config ) diff --git a/src/models/gatedcnn_nci.py b/src/models/gatedcnn_nci.py new file mode 100644 index 0000000..bd1d765 --- /dev/null +++ b/src/models/gatedcnn_nci.py @@ -0,0 +1,568 @@ +import itertools + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.init import normal_, xavier_uniform_ + +from src.utils.caml_utils import load_lookups, pad_desc_vecs +from src.utils.mapper import ConfigMapper +from src.utils.text_loggers import get_logger + +logger = get_logger(__name__) + + +@ConfigMapper.map("models", "gatedcnn_nci") +class GatedCNNNCI(nn.Module): + def __init__(self, config): + super(GatedCNNNCI, self).__init__() + self.max_length = config.max_length + self.dropout = config.dropout + self.input_dim = config.input_dim + self.hidden_dim = config.hidden_dim + self.output_dim = config.output_dim + self.bidirectional = config.bidirectional + self.use_description = config.use_description + + self.word_embedding_layer = WordEmbeddingLayer( + embed_dir=config.embed_dir, + dataset_dir=config.dataset_dir, + mimic_dir=config.mimic_dir, + static_dir=config.static_dir, + version=config.version, + dropout=config.dropout, + pad_token=config.pad_token, + unk_token=config.unk_token, + ) + self.desc_vecs = self.word_embedding_layer.desc_vecs + + self.encoder = GatedCNN( + input_dim=config.input_dim, + hidden_dim=config.hidden_dim, + output_dim=config.output_dim, + kernel_size=config.kernel_size, + dropout=config.dropout, + init_mean=config.init_mean, + init_std=config.init_std, + levels=config.levels, + ) + + if self.bidirectional: + self.output_layer = OutputLayer( + embed_dir=config.embed_dir, + dataset_dir=config.dataset_dir, + mimic_dir=config.mimic_dir, + static_dir=config.static_dir, + version=config.version, + input_dim=2 * config.input_dim, + num_labels=config.output_dim, + dropout=config.dropout, + pad_token=config.pad_token, + unk_token=config.unk_token, + ) + else: + self.output_layer = OutputLayer( + embed_dir=config.embed_dir, + dataset_dir=config.dataset_dir, + mimic_dir=config.mimic_dir, + static_dir=config.static_dir, + version=config.version, + input_dim=config.input_dim, + num_labels=config.output_dim, + dropout=config.dropout, + pad_token=config.pad_token, + unk_token=config.unk_token, + ) + + self.variational_dropout = VariationalDropout(dropout=config.dropout) + + self.hidden = None + + def freeze_net(self): + for p in self.word_embedding_layer.embed.parameters(): + p.requires_grad = False + + def init_hidden(self, batch_size): + h_size = self.hidden_dim + self.output_dim + weight = next(self.parameters()).data + self.hidden = ( + weight.new(batch_size, h_size, 1).zero_(), + weight.new(batch_size, h_size, 1).zero_(), + ) + + def _reverse_seq(self, X, mask, seq_max_len): + """ + X -> batch, seq_len, dim + mask -> batch, seq_len + """ + device = X.get_device() + if device == -1: + device = "cpu" + + mask_sum = torch.sum(mask, 1).int() + xfs = [] + for x, c in zip(X, mask_sum): + xf = torch.flip(x[:c], [0]) + xfs.append(xf) + padded_rev = torch.zeros((len(xfs), X.size(1), X.size(2))).to(device) + for i, mat in enumerate(xfs): + padded_rev[i][: len(mat), :] = mat + return padded_rev + + def forward(self, data): + """ + :param data: The input sequence, with dimesion (N, L) + :param desc: Whether to use code description + :return: logits, loss, hidden + """ + device = data.get_device() + if device == -1: + device = "cpu" + + # If this is the first forward pass, we will initialise the hidden + # state. + if self.hidden is None: + self.init_hidden_flag = True + self.hidden = self.init_hidden(data.size(0)) + + # Look up the embeddings of all the tokens using the WordEmbeddingLayer. + # `emb` shape: (batch_size, max_length, embed_size) + emb, mask = self.word_embedding_layer(data) + + # If we want a bidirectional model, we reverse the sequence of + # tokens. + if self.bidirectional: + # `emb_reverse` shape: (batch_size, max_length, embed_size) + emb_reverse = self._reverse_seq(emb, mask, self.max_length) + # `emb_reverse` shape`: [batch_size, embed_size, max_length] + emb_reverse = emb_reverse.transpose(1, 2) + # `emb` shape: (batch_size, embed_size, max_length) + emb = emb.transpose(1, 2) + + # Pass the embeddings through the encoder. If the model is + # bidirectional, we pass the reverse embeddings as well. + raw_output, self.hidden = self.encoder(emb, self.hidden) + if self.bidirectional: + raw_out_reverse, self.hidden = self.encoder( + emb_reverse, self.hidden + ) + + output = self.variational_dropout(raw_output) + if self.bidirectional: + output_reverse = self._reverse_seq( + raw_out_reverse, mask, self.max_length + ) + output_reverse = self.variational_dropout(output_reverse) + output = torch.cat([output, output_reverse], dim=2) + + if self.use_description: + logits = self.output_layer(output, self.desc_vecs.to(device)) + else: + logits = self.output_layer(output, None) + return logits + + +class WordEmbeddingLayer(nn.Module): + """ + A Word Embedding Layer. This layer loads a pre-trained word embedding matrix + , and copies its weights to an nn.Embedding layer. + + Args: + embed_dir (str): A directory containing the pre-trained word embedding + matrix, among other things. Please see + https://github.com/dalgu90/icd-coding-benchmark/blob/main/src/modules/embeddings.py#L17 + for more details. + dropout (float): The dropout probability. + """ + + def __init__( + self, + embed_dir, + dataset_dir, + mimic_dir, + static_dir, + version, + dropout, + pad_token="", + unk_token="", + return_pad_mask=True, + use_description=True, + ): + super(WordEmbeddingLayer, self).__init__() + logger.debug( + f"Initialising {self.__class__.__name__} with " + f"embed_dir = {embed_dir}, dropout = {dropout}" + ) + + self.return_pad_mask = return_pad_mask + + # Note: This should be changed, since we won't always use Word2Vec. + embedding_cls = ConfigMapper.get_object("embeddings", "word2vec") + vocab = embedding_cls.load_vocab(embed_dir) + self.pad_token_id = vocab[pad_token] + self.unk_token_id = vocab[unk_token] + + W = torch.Tensor(embedding_cls.load_emb_matrix(embed_dir)) + self.embed = nn.Embedding(W.size()[0], W.size()[1], padding_idx=0) + self.embed.weight.data = W.clone() + + self.embedding_size = self.embed.embedding_dim + + self.dropout = nn.Dropout(dropout) + + if use_description: + dicts = load_lookups( + dataset_dir=dataset_dir, + mimic_dir=mimic_dir, + static_dir=static_dir, + word2vec_dir=embed_dir, + version=version, + ) + ind2c = dicts["ind2c"] + w2ind = dicts["w2ind"] + desc_dict = dicts["desc"] + self.desc_vecs = [] + for i, c in ind2c.items(): + self.desc_vecs.append( + [ + w2ind[w] if w in w2ind else self.unk_token_id + for w in desc_dict[c] + ] + ) + + # Pad and convert to torch tensor. + self.desc_vecs = torch.Tensor( + list(zip(*itertools.zip_longest(*self.desc_vecs, fillvalue=0))) + ).long() + + def forward(self, x): + if self.return_pad_mask: + pad_mask = ~(x == self.pad_token_id) + embedding = self.embed(x) + x = self.dropout(embedding) + if self.return_pad_mask: + return x, pad_mask + return x + + +class VariationalHidDropout(nn.Module): + """ + Hidden-to-hidden (VD-based) dropout that applies the same mask at every + time step and every layer of TrellisNet. + + Args: + dropout (float): The dropout probability. + """ + + def __init__(self, dropout=0.0): + super(VariationalHidDropout, self).__init__() + self.dropout_probability = dropout + self.mask = None + + def reset_mask(self, input): + + # Dimension (N, C, L) + m = input.data.new(input.size(0), input.size(1), 1).bernoulli_( + 1 - self.dropout_probability + ) + with torch.no_grad(): + mask = m / (1 - self.dropout_probability) + self.mask = mask + return mask + + def forward(self, input): + # We don't apply dropout if the model is in eval mode. + if not self.training or self.dropout_probability == 0: + return input + + assert ( + self.mask is not None + ), "You need to reset mask before using VariationalHidDropout" + mask = self.mask.expand_as(input) # Make sure the dimension matches + return mask * input + + +class WeightShareConv1d(nn.Module): + """ + The weight-tied 1D convolution used in TrellisNet. + + Args: + input_dim (int): The dimension of the input. This is equivalent to + the number of input channels in the first + convolutional layer. + hidden_dim (int): The dimension of the hidden state. This is + equivalent to the number of input channels in the + second convolutional layer. + out_channels (int): The number of output channels in both + convolutional layers. + kernel_size (int): The size of the filter used in both + convolutional layers. + dropout (float): Dropout probability for the hidden-to-hidden + dropout layer. + init_mean (float): The mean of the normal distribution with which + weights of the convolutional layers are + initialised. + init_std (float): The standard deviation of the normal distribution + with which weights of the convolutional layers are + initialised. + """ + + def __init__( + self, + input_dim, + hidden_dim, + out_channels, + kernel_size, + dropout=0.0, + init_mean=0.0, + init_std=0.01, + ): + super(WeightShareConv1d, self).__init__() + + self.input_dim = input_dim + self.kernel_size = kernel_size + + self._dict = {} + + conv_layer_1 = nn.Conv1d( + in_channels=input_dim, + out_channels=out_channels, + kernel_size=kernel_size, + ) + self.weight_1 = conv_layer_1.weight + + conv_layer_2 = nn.Conv1d( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=kernel_size, + ) + self.weight_2 = conv_layer_2.weight + self.bias_2 = conv_layer_2.bias + + self.init_conv_weights(init_mean, init_std) + + self.dropout = VariationalHidDropout(dropout=dropout) + + self.dict = {} + + def init_conv_weights(self, init_mean, init_std): + self.weight_1.data.normal_(mean=init_mean, std=init_std) + self.weight_2.data.normal_(mean=init_mean, std=init_std) + self.bias_2.data.normal_(mean=init_mean, std=init_std) + + def forward(self, input, dilation, hid): + batch_size = input.size(0) + + padding = (self.kernel_size - 1) * dilation # Padding size. + x = F.pad(input=input, pad=(padding, 0)) # Pad with zeros. + + x_1 = x[:, : self.input_dim] + z_1 = x[:, self.input_dim :] + z_1[:, :, :padding] = hid[:batch_size, :, :].repeat(1, 1, padding) + + device = x_1.get_device() + + if (dilation, device) not in self.dict or self.dict[ + (dilation, device) + ] is None: + self.dict[(dilation, device)] = F.conv1d( + input=x_1, weight=self.weight_1, dilation=dilation + ) + + z_1 = self.dropout(z_1) + injected = self.dict[(dilation, device)] + F.conv1d( + input=z_1, weight=self.weight_2, bias=self.bias_2, dilation=dilation + ) + return injected + + +class GatedCNN(nn.Module): + """ + Gated CNN module. + + Args: + input_dim (int): The dimension of the input. + hidden_dim (int): The hidden dimension. The hidden dimension for the + weight-shared Conv1D layer is + `hidden_dim + output_dim`. + output_dim (int): The output dimension. The number of output + channels of the weight-shared Conv1D layer is + `4 * (hidden_dim + output_dim)`. + kernel_size (int): The size of the filter used in + `WeightSharedConv1D`. + dropout (float): Dropout probability for the `WeightSharedConv1D`. + init_mean (float): The mean of the normal distribution with which + weights of the `WeightSharedConv1D` layer are + initialised. + init_std (float): The standard deviation of the normal distribution + with which weights of the `WeightSharedConv1D` + layer are initialised. + """ + + def __init__( + self, + input_dim, + hidden_dim, + output_dim, + kernel_size, + dropout, + init_mean, + init_std, + levels, + ): + super(GatedCNN, self).__init__() + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.output_dim = output_dim + self.levels = levels + + self.hidden_dim_for_conv = hidden_dim + output_dim + + self.dilations = [i + 1 for i in range(levels)] + + self.full_conv = WeightShareConv1d( + input_dim=input_dim, + hidden_dim=self.hidden_dim_for_conv, + out_channels=4 * self.hidden_dim_for_conv, + kernel_size=kernel_size, + dropout=dropout, + init_mean=init_mean, + init_std=init_std, + ) + + self.ht = None + + def transform_input(self, X): + device = X.get_device() + if device == -1: + device = "cpu" + + batch_size = X.size(0) + seq_len = X.size(2) + + ht = torch.zeros(batch_size, self.hidden_dim_for_conv, seq_len).to( + device + ) + self.ct = torch.zeros(batch_size, self.hidden_dim_for_conv, seq_len).to( + device + ) + return torch.cat((X, ht), dim=1) + + def gating(self, Z, dilation=1, hc=None): + batch_size = Z.size(0) + (hid, cell) = hc + + out = self.full_conv(input=Z, dilation=dilation, hid=hid) + + ct_1 = F.pad(self.ct, (dilation, 0))[:, :, :-dilation] + ct_1[:, :, :dilation] = cell[:batch_size].repeat(1, 1, dilation) + + it = torch.sigmoid(out[:, : self.hidden_dim_for_conv]) + ot = torch.sigmoid( + out[:, self.hidden_dim_for_conv : 2 * self.hidden_dim_for_conv] + ) + gt = torch.tanh( + out[:, 2 * self.hidden_dim_for_conv : 3 * self.hidden_dim_for_conv] + ) + ft = torch.sigmoid( + out[:, 3 * self.hidden_dim_for_conv : 4 * self.hidden_dim_for_conv] + ) + self.ct = ft * ct_1 + it * gt + ht = ot * torch.tanh(self.ct) + + Z = torch.cat((Z[:, : self.input_dim], ht), dim=1) + return Z + + def forward(self, emb, hc): + Z = self.transform_input(emb) + for key in self.full_conv.dict: + if key[1] == emb.get_device(): + self.full_conv.dict[key] = None + self.full_conv.dropout.reset_mask(Z[:, self.input_dim :]) + + for dilation_per_level in self.dilations: + Z = self.gating(Z, dilation=dilation_per_level, hc=hc) + + out = Z[:, -self.output_dim :].transpose(1, 2) + hc = (Z[:, self.input_dim :, -1:], self.ct[:, :, -1:]) + return out, hc + + +class VariationalDropout(nn.Module): + """ + Feed-forward version of variational dropout that applies the same mask + at every time step. + """ + + def __init__(self, dropout=0.5, dim=3): + super(VariationalDropout, self).__init__() + assert dim in (3, 4), "`dim` should be either 3 or 4" + self.dropout = dropout + self.dim = dim + + def forward(self, x): + if not self.training or not self.dropout: + return x + + if self.dim == 4: + # Dimension (M, N, L, C), where C stands for channels + m = x.data.new(x.size(0), x.size(1), 1, x.size(3)).bernoulli_( + 1 - self.dropout + ) + else: + # Dimension (N, L, C) + m = x.data.new(x.size(0), 1, x.size(2)).bernoulli_(1 - self.dropout) + with torch.no_grad(): + mask = m / (1 - self.dropout) + mask = mask.expand_as(x) + return mask * x + + +class OutputLayer(nn.Module): + def __init__( + self, + embed_dir, + dataset_dir, + mimic_dir, + static_dir, + version, + input_dim, + num_labels, + dropout=0.2, + pad_token="", + unk_token="", + ): + super(OutputLayer, self).__init__() + + self.word_embedding_layer = WordEmbeddingLayer( + embed_dir=embed_dir, + dataset_dir=dataset_dir, + mimic_dir=mimic_dir, + static_dir=static_dir, + version=version, + dropout=dropout, + pad_token=pad_token, + unk_token=unk_token, + ) + + self.U = nn.Linear(input_dim, num_labels) + self.final = nn.Linear(input_dim, num_labels) + self.proj_layer = nn.Linear(input_dim, 1, bias=False) + + xavier_uniform_(self.U.weight) + xavier_uniform_(self.final.weight) + + def forward(self, x, desc): + if desc is not None: + desc_vec, _ = self.word_embedding_layer(desc) + desc_vec = torch.mean(desc_vec, dim=1).unsqueeze(0) + mmt = x.matmul(desc_vec) + else: + mmt = self.U.weight.matmul(x.transpose(1, 2)) + + m = x.transpose(1, 2).matmul(mmt) + + y = self.final.weight.mul(m) + logits = self.proj_layer(y).squeeze(-1).add(self.final.bias) + + return logits diff --git a/src/modules/metrics.py b/src/modules/metrics.py index 04d2341..a5edda6 100755 --- a/src/modules/metrics.py +++ b/src/modules/metrics.py @@ -23,6 +23,7 @@ def to_np_array(array): array = np.array(array) return array + def _auc_job(x): return roc_auc_score(x[0], x[1]) diff --git a/src/trainers/base_trainer.py b/src/trainers/base_trainer.py index a1a70dc..506cd47 100755 --- a/src/trainers/base_trainer.py +++ b/src/trainers/base_trainer.py @@ -174,6 +174,10 @@ def train(self, model, train_dataset, val_dataset=None): batch_inputs = batch_inputs.cuda() batch_labels = batch_labels.cuda() + # Initialise the hidden states. + if self.config.initialise_hidden_states: + model.init_hidden(batch_inputs.size(0)) + batch_outputs = model(batch_inputs) batch_loss = self.loss_fn( input=batch_outputs, target=batch_labels @@ -393,6 +397,11 @@ def _forward_epoch(self, model, dataset=None, dataloader=None): if self.config.use_gpu: batch_inputs = batch_inputs.cuda() batch_labels = batch_labels.cuda() + + # Initialise the hidden states. + if self.config.initialise_hidden_states: + model.init_hidden(batch_inputs.size(0)) + batch_outputs = model(batch_inputs) epoch_labels.append(batch_labels.cpu()) epoch_outputs.append(batch_outputs.cpu()) diff --git a/src/utils/caml_utils.py b/src/utils/caml_utils.py index a165a4d..58fd76d 100644 --- a/src/utils/caml_utils.py +++ b/src/utils/caml_utils.py @@ -101,3 +101,18 @@ def pad_desc_vecs(desc_vecs): for vec in desc_vecs: pad_vecs.append(vec + [0] * (desc_len - len(vec))) return pad_vecs + + +def load_description_tokens(lookup_dict, vocab_json): + # load description one-hot vectors from file + dv_dict = {} + + with open("%s/description_vectors.vocab" % (data_dir), "r") as vfile: + r = csv.reader(vfile, delimiter=" ") + # header + next(r) + for row in r: + code = row[0] + vec = [int(x) for x in row[1:]] + dv_dict[code] = vec + return dv_dict