Artemis2467 · Artemis2467 · Mar 22, 2026 · Mar 22, 2026 · Mar 23, 2026 · Mar 25, 2026
diff --git a/code/Functions.py b/code/Functions.py
@@ -7,46 +7,55 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from sklearn.metrics import roc_curve, auc, precision_score, recall_score, f1_score
+from sklearn.metrics import roc_curve, auc, f1_score, classification_report
 from sentence_transformers import SentenceTransformer
 from StoreDataset import FileLoader, retrieve_to
 
 class LinearConfig:
 
-    pos_weight = torch.tensor(0.502773)
+    pos_weight = torch.tensor(0.65)
     criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
 
     train_p = 0.8
     val_p = 0.1
 
-    d_model = 128
+    d_model_choices = [32, 64, 128, 256]
 
     num_epochs = 50
     batch_num = 32
-    learning_rate = 0.01
+    learning_rate_choices = [0.1, 0.05, 0.01, 0.005, 0.001]
     stop_patience = 10
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+    learning_rate = 0.1
+    d_model = 32
+
 class LogitConfig:
 
-    pos_weight = torch.tensor(0.5)
+    pos_weight = torch.tensor(0.65)
     criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
+    add_conv_choices = [False, True]
 
     train_p = 0.8
     val_p = 0.1
 
-    d_model = 128
-    conv_ch = 32
+    d_model_choices = [32, 64, 128, 256]
+    conv_ch_choices = [32, 64]
     drop_out = 0.1
 
-    num_epochs = 50
+    num_epochs = 100
     batch_num = 32
-    learning_rate = 0.01
+    learning_rate_choices = [0.1, 0.05, 0.01, 0.005, 0.001]
     patience = 10
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+    add_conv = False
+    d_model = 64
+    conv_ch = 64
+    learning_rate = 0.05
+
 def count_files(dir):
     files = [f for f in os.listdir(dir)]
     return len(files)
@@ -201,7 +210,7 @@ def run_batch(config,
 
     return total_loss
 
-def plot_loss(history, is_linear):
+def plot_loss(history, is_linear, show, length=None):
     plt.figure(figsize=(8, 5))
     plt.plot(history["running_loss"], label='running_loss', color='blue')
     plt.plot(history["val_loss"], label='val_loss', color='red', linestyle="dashed")
@@ -211,52 +220,56 @@ def plot_loss(history, is_linear):
     plt.legend()
 
     if is_linear:
-        length = count_files(r"test_results\loss\linear")
-        plt.savefig(fr"test_results\loss\linear\linear_{length + 1}.pdf")
+        if not length:
+            length = count_files(r"test_results\loss\linear") + 1
+        plt.savefig(fr"test_results\loss\linear\linear_{length}.pdf")
     else:
-        length = count_files(r"test_results\loss\logprob")
-        plt.savefig(fr"test_results\loss\logprob\logprob_{length + 1}.pdf")
-    plt.show()
+        if not length:
+            length = count_files(r"test_results\loss\logprob") + 1
+        plt.savefig(fr"test_results\loss\logprob\logprob_{length}.pdf")
 
-def graph_roc_curve(config, test_loader, model, parameter_path: str):
+    if show:
+        plt.show()
 
+def test_model(config, test_loader, model, parameter_path):
     model.load_state_dict(torch.load(os.path.join("models", parameter_path)))
 
-    results, targets = run_batch(
+    y_pred, y_true = run_batch(
         config,
         test_loader,
         model,
         None,
         "test",
     )
 
-    fpr, tpr, threshold = roc_curve(targets, results)
+    fpr, tpr, threshold = roc_curve(y_true, y_pred)
     auroc = auc(fpr, tpr)
-    results = [0 if result < 0.5 else 1 for result in results]
-    f1 = f1_score(targets, results, average="binary")
-    prec = precision_score(targets, results)
-    recall = recall_score(targets, results)
-    print(f1)
-    print(prec)
-    print(recall)
+    y_pred = [0 if result < 0.5 else 1 for result in y_pred]
+    f1 = f1_score(y_true, y_pred)
+    report = classification_report(y_true, y_pred, zero_division=0)
+    report_dict = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
+
+    return auroc, f1, fpr, tpr, report, report_dict
+
+def graph_roc_curve(auroc, fpr, tpr, is_linear, length=None):
 
     plt.figure()  
     plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auroc)
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
-    plt.title(f'ROC Curve for {"linear model" if model.is_linear else "logprob model"}')
+    plt.title(f'ROC Curve for {"linear model" if is_linear else "logprob model"}')
     plt.legend()
 
-    if model.is_linear:
-        length = count_files(r"test_results\ROC\linear")
-        plt.savefig(fr"test_results\ROC\linear\{length + 1}.pdf")
+    if is_linear:
+        if not length:
+            length = count_files(r"test_results\ROC\linear") + 1
+        plt.savefig(fr"test_results\ROC\linear\{length}.pdf")
     else:
-        length = count_files(r"test_results\ROC\logprob")
-        plt.savefig(fr"test_results\ROC\logprob\{length + 1}.pdf")
-
-    plt.show()
+        if not length:
+            length = count_files(r"test_results\ROC\logprob") + 1
+        plt.savefig(fr"test_results\ROC\logprob\{length}.pdf")
 
 if __name__ == "__main__":
     store_data(ans_dataset="TruthfulDataset.jsonl", cal_dataset="CalDataset.jsonl")
diff --git a/code/Layers.py b/code/Layers.py
@@ -21,7 +21,8 @@ def __init__(self):
         self.register_buffer("pe", pe)
 
     def forward(self, x):
-        return x + self.pe[:x.size(1), :].unsqueeze(0)
+        res = x + self.pe[:x.size(1), :].unsqueeze(0)
+        return res
 
 class Attention(nn.Module):
     def __init__(self, inner_feature):
@@ -54,20 +55,26 @@ class LogitModel(nn.Module):
     def __init__(self, config):
         super().__init__()
 
+        self.con = config
         self.is_linear = False
 
         self.pe = PositionalEncoding()
         self.self_attention = Attention(inner_feature=config.d_model)
-        self.pooling_layer = nn.AdaptiveAvgPool1d(1)
-        self.dropout = nn.Dropout(p=config.drop_out)
 
-        self.fc_attention = nn.Linear(in_features=config.d_model, out_features=1)
+        if config.add_conv and config.conv_ch:
+            self.conv = nn.Conv2d(1, config.conv_ch, kernel_size=(3, 3))
+            self.pooling_layer = nn.AdaptiveMaxPool2d((1, 1))
+            self.dropout = nn.Dropout(p=config.drop_out)
+            self.fc_attention = nn.Linear(in_features=config.conv_ch, out_features=1)
+        elif not config.add_conv:
+            self.pooling_layer = nn.AdaptiveAvgPool1d(1)
+            self.dropout = nn.Dropout(p=config.drop_out)
+            self.fc_attention = nn.Linear(in_features=config.d_model, out_features=1)
+        else:
+            raise RuntimeError("add_conv not properly structured")
+
         self.fc_final = nn.Linear(in_features=3, out_features=1)
 
-        # self.conv = nn.Conv2d(1, config.conv_ch, kernel_size=(3, 3))
-        # self.pooling_layer = nn.AdaptiveMaxPool2d((1, 1))
-        # self.dropout = nn.Dropout(p=config.drop_out)
-        # self.fc_attention = nn.Linear(in_features=config.conv_ch, out_features=1)
 
     def forward(self, resp1_logits, resp2_logits, cosine_sim, entropy):
         position1 = self.pe(resp1_logits)
@@ -78,12 +85,15 @@ def forward(self, resp1_logits, resp2_logits, cosine_sim, entropy):
             position2,
             position2,
             )
-
-        pooled = self.pooling_layer(self_attention_values)
-        pooled = pooled.view(pooled.size(0), -1)
 
-        # conv_output = self.conv(self_attention_values.unsqueeze(1))
-        # pooled = self.pooling_layer(conv_output).view(conv_output.size(0), conv_output.size(1))
+        if self.con.add_conv:
+            conv_output = self.conv(self_attention_values.unsqueeze(1))
+            pooled = self.pooling_layer(conv_output).view(conv_output.size(0), conv_output.size(1))
+        elif not self.con.add_conv:
+            pooled = self.pooling_layer(self_attention_values)
+            pooled = pooled.view(pooled.size(0), -1)
+        else:
+            raise RuntimeError("add_conv not properly structured")
 
         dropped = self.dropout(pooled)
         attention_score = self.fc_attention(dropped)

diff --git a/code/LinearTrain.py b/code/LinearTrain.py
@@ -1,49 +1,60 @@
 import torch
 import torch.optim as optim
 from Layers import LinearModel
-from DatasetLoader import linear_train_loader, linear_val_loader
-from Functions import LinearConfig, plot_loss, run_batch, count_files
-
-config = LinearConfig()
-device = config.device
-length = count_files(r"models\linear")
-
-model = LinearModel(config)
-optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
-
-prev_loss = float('inf')
-history = {'running_loss': [], 'val_loss': []}
-
-for epoch in range(config.num_epochs):
-
-    running_loss = run_batch(config=config,
-              loader=linear_train_loader,
-              model=model,
-              optimizer=optimizer,
-              type="train",
-              epoch=epoch
-              )
-
-    val_loss = run_batch(config=config,
-                         loader=linear_val_loader,
-                         model=model,
-                         optimizer=optimizer,
-                         type="val",
-                         )
-
-    running_loss /= len(linear_train_loader.dataset)
-    val_loss /= len(linear_val_loader.dataset)
-    history['running_loss'].append(running_loss)
-    history['val_loss'].append(val_loss)
-
-    if val_loss < prev_loss:
-        prev_loss = val_loss
-        patience_count = 0
-        torch.save(model.state_dict(), fr'models\linear\{length + 1}.pth')
-    else:
-        patience_count += 1
-        if patience_count >= config.stop_patience:
-            print(f'Early stopping at epoch {epoch + 1}')
-            break
-
-plot_loss(history, is_linear=True)
+from DatasetLoader import linear_train_loader, linear_val_loader, linear_test_loader
+from Functions import LinearConfig, plot_loss, run_batch, test_model, graph_roc_curve
+
+def linear_train(config, model_pth):
+
+    model = LinearModel(config)
+    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
+
+    prev_loss = float('inf')
+    history = {'running_loss': [], 'val_loss': []}
+
+    for epoch in range(config.num_epochs):
+
+        running_loss = run_batch(config=config,
+                loader=linear_train_loader,
+                model=model,
+                optimizer=optimizer,
+                type="train",
+                epoch=epoch
+                )
+
+        val_loss = run_batch(config=config,
+                            loader=linear_val_loader,
+                            model=model,
+                            optimizer=optimizer,
+                            type="val",
+                            )
+
+        running_loss /= len(linear_train_loader.dataset)
+        val_loss /= len(linear_val_loader.dataset)
+        history['running_loss'].append(running_loss)
+        history['val_loss'].append(val_loss)
+
+        if val_loss < prev_loss:
+            prev_loss = val_loss
+            patience_count = 0
+            torch.save(model.state_dict(), fr'models\linear\{model_pth}')
+        else:
+            patience_count += 1
+            if patience_count >= config.stop_patience:
+                print(f'Early stopping at epoch {epoch + 1}')
+                break
+
+    return history
+
+
+if __name__ == "__main__":
+    model_pth = input("model's parameter path: ")
+
+    config = LinearConfig()
+    history = linear_train(config, model_pth)
+    model = LinearModel(config)
+
+    plot_loss(history, is_linear=True, show=True)
+
+    auroc, f1, fpr, tpr = test_model(config, linear_test_loader, model, fr"linear\{model_pth}")
+    graph_roc_curve(auroc, f1, fpr, tpr, is_linear=True)